def testOperationTest_test(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) data = DataMatrix(1,1) data.setAll(0.25) classes = DataVector(1) classes.setAll(1.0) testOP = factory.createOperationTest() alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = 1.0 c = testOP.test(alpha, data, classes) self.failUnless(c > 0.0) alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = -1.0 c = testOP.test(alpha, data, classes) self.failUnless(c == 0.0)
def computeMoments(self, ts=None): names = [ 'time', 'iteration', 'grid_size', 'mean', 'meanDiscretizationError', 'var', 'varDiscretizationError' ] # parameters ts = self.__samples.keys() nrows = len(ts) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in ts: v.setAll(0.0) v[0] = t v[1] = 0 v[2] = len(self.__samples[t].values()) v[3], v[4] = self.mean(ts=[t]) v[5], v[6] = self.var(ts=[t]) # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def testOperationB(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(2) alpha = DataVector(factory.getStorage().size()) p = DataMatrix(1,1) beta = DataVector(1) alpha.setAll(0.0) p.set(0,0,0.25) beta[0] = 1.0 opb = factory.createOperationB() opb.mult(beta, p, alpha) self.failUnlessAlmostEqual(alpha[0], 0.75) self.failUnlessAlmostEqual(alpha[1], 0.25) self.failUnlessAlmostEqual(alpha[2], 0.5) self.failUnlessAlmostEqual(alpha[3], 1.0) self.failUnlessAlmostEqual(alpha[4], 0.0) alpha.setAll(0.0) alpha[2] = 1.0 p.set(0,0, 0.25) beta[0] = 0.0 opb.multTranspose(alpha, p, beta) self.failUnlessAlmostEqual(beta[0], 0.5)
def computeBilinearForm(self, grid): """ Compute bilinear form for the current grid @param grid: Grid @return DataMatrix """ # create bilinear form of the grid gs = grid.getStorage() A = DataMatrix(gs.size(), gs.size()) A.setAll(0.) createOperationLTwoDotExplicit(A, grid) gs = grid.getStorage() A = DataMatrix(gs.size(), gs.size()) createOperationLTwoDotExplicit(A, grid) # multiply the entries with the pdf at the center of the support p = DataVector(gs.dim()) q = DataVector(gs.dim()) for i in xrange(gs.size()): gpi = gs.get(i) gpi.getCoords(p) for j in xrange(gs.size()): gpj = gs.get(j) gpj.getCoords(q) y = float(A.get(i, j) * self._U.pdf(p)) A.set(i, j, y) A.set(j, i, y) self._map[self.getKey(gpi, gpj)] = A.get(i, j) return A
def merge(cls, containerList): if len(containerList) == 0: return None # determine the total number of entries size = 0 for container in containerList: size += len(container.getValues()) dim = container.getPoints().getNcols() # Copy data to the new DataVector's entry by entry allPoints = DataMatrix(size, dim) allValues = DataVector(size) tmpVector = DataVector(dim) i = 0 for container in containerList: points = container.getPoints() values = container.getValues() for j in range(len(values)): points.getRow(j, tmpVector) allPoints.setRow(i, tmpVector) allValues[i] = values[j] i += 1 # return new DataContainer return DataContainer(points=allPoints, values=allValues)
def computeMoments(self, iterations=None, ts=None): names = [ 'time', 'iteration', 'grid_size', 'mean', 'meanDiscretizationError', 'var', 'varDiscretizationError' ] # parameters if ts is None: ts = self.__knowledge.getAvailableTimeSteps() if iterations is None: iterations = self.__knowledge.getAvailableIterations() nrows = len(ts) * len(iterations) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in ts: for iteration in iterations: size = self.__knowledge.getGrid(qoi=self._qoi, iteration=iteration).getSize() v.setAll(0.0) v[0] = t v[1] = iteration v[2] = size v[3], v[4] = self.mean(ts=[t], iterations=[iteration]) v[5], v[6] = self.var(ts=[t], iterations=[iteration]) # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def computeStats(self, dtype): names = [ 'time', 'iteration', 'level', 'grid_size', 'trainMSE', 'trainL2Error', 'testMSE', 'testL2Error', 'L2ErrorSurpluses' ] ts = self.__learner.getTimeStepsOfInterest() iterations = self.__learner.iteration + 1 nrows = len(ts) * iterations ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) v.setAll(0.) row = 0 for t in ts: for iteration in xrange(iterations): v[0] = t v[1] = iteration v[2] = self.__learner.level[dtype][iteration] v[3] = self.__learner.numberPoints[dtype][iteration] v[4] = self.__learner.trainAccuracy[dtype][t][iteration] n = self.__learner.trainCount[dtype][t][iteration] v[5] = float(np.sqrt(v[4] * n)) # == L2 error if len(self.__learner.testAccuracy[dtype][t]) == \ len(self.__learner.trainAccuracy[dtype][t]): v[6] = self.__learner.testAccuracy[dtype][t][iteration] n = self.__learner.testCount[dtype][t][iteration] v[7] = float(np.sqrt(v[6] * n)) # == L2 error v[8] = self.computeL2ErrorSurpluses(self._qoi, t, dtype, iteration) # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def testOperationTest_test(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) data = DataMatrix(1, 1) data.setAll(0.25) classes = DataVector(1) classes.setAll(1.0) testOP = factory.createOperationTest() alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = 1.0 c = testOP.test(alpha, data, classes) self.failUnless(c > 0.0) alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = -1.0 c = testOP.test(alpha, data, classes) self.failUnless(c == 0.0)
def computeMoments(self, ts=None): names = ['time', 'iteration', 'grid_size', 'mean', 'meanDiscretizationError', 'var', 'varDiscretizationError'] # parameters ts = self.__samples.keys() nrows = len(ts) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in ts: v.setAll(0.0) v[0] = t v[1] = 0 v[2] = len(self.__samples[t].values()) v[3], v[4] = self.mean(ts=[t]) v[5], v[6] = self.var(ts=[t]) # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def checkPositivity(grid, alpha): # define a full grid of maxlevel of the grid gs = grid.getStorage() fullGrid = Grid.createLinearGrid(gs.dim()) fullGrid.createGridGenerator().full(gs.getMaxLevel()) fullHashGridStorage = fullGrid.getStorage() A = DataMatrix(fullHashGridStorage.size(), fullHashGridStorage.dim()) p = DataVector(gs.dim()) for i in xrange(fullHashGridStorage.size()): fullHashGridStorage.get(i).getCoords(p) A.setRow(i, p) res = evalSGFunctionMulti(grid, alpha, A) ymin, ymax, cnt = 0, -1e10, 0 for i, yi in enumerate(res.array()): if yi < 0. and abs(yi) > 1e-13: cnt += 1 ymin = min(ymin, yi) ymax = max(ymax, yi) A.getRow(i, p) print " %s = %g" % (p, yi) if cnt > 0: print "warning: function is not positive" print "%i/%i: [%g, %g]" % (cnt, fullHashGridStorage.size(), ymin, ymax) return cnt == 0
def __prepareDataContainer(self, data, name): """ Prepare data for learning @param data: dictionary loaded from UQSetting @return dictionary {dtype: {t: <DataContainer>}} """ ans = {} U = self.getParameters()\ .activeParams()\ .getIndependentJointDistribution() for dtype in self.getKnowledgeTypes(): ans[dtype] = {} dim = self.grid.getStorage().getDimension() # prepare data container depending on the given knowledge type tmp = KnowledgeTypes.transformData(data, U, dtype) # load data for all time steps for t, values in list(tmp.items()): size = len(values) mydata = DataMatrix(size, dim) sol = DataVector(size) for i, (sample, res) in enumerate(values.items()): p = DataVector(sample.getActiveUnit()) mydata.setRow(i, p) sol[i] = float(res) ans[dtype][t] = DataContainer(points=mydata, values=sol, name=name) return ans
def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [ trans.getBounds() for trans in transformation.getTransformations() ] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE( self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile
def computeTrilinearFormByList(self, gpsk, basisk, alphak, gpsi, basisi, gpsj, basisj): """ Compute trilinear form for two lists of grid points @param gpsk: list of HashGridIndex @param basisk: SG++ basis for grid indices gpsk @param alphak: coefficients for kth grid @param gpsi: list of HashGridIndex @param basisi: SG++ basis for grid indices gpsi @param gpsj: list of HashGridIndex @param basisj: SG++ basis for grid indices gpsj @return: DataMatrix """ print "# evals: %i^2 * %i = %i" % (len(gpsi), len(gpsk), len(gpsi) ** 2 * len(gpsk)) A = DataMatrix(len(gpsi), len(gpsj)) err = 0. # run over all rows for i, gpi in enumerate(gpsi): # run over all columns for j, gpj in enumerate(gpsj): # run over all gpks b, erri = self.computeTrilinearFormByRow(gpsk, basisk, gpi, basisi, gpj, basisj) # get the overall contribution in the current dimension value = alphak.dotProduct(b) A.set(i, j, value) # error statistics err += erri return A, err
def merge(cls, containerList): if len(containerList) == 0: return None # determine the total number of entries size = 0 for container in containerList: size += len(container.getValues()) dim = container.getPoints().getNcols() # Copy data to the new DataVector's entry by entry allPoints = DataMatrix(size, dim) allValues = DataVector(size) tmpVector = DataVector(dim) i = 0 for container in containerList: points = container.getPoints() values = container.getValues() for j in xrange(len(values)): points.getRow(j, tmpVector) allPoints.setRow(i, tmpVector) allValues[i] = values[j] i += 1 # return new DataContainer return DataContainer(points=allPoints, values=allValues)
def loadData(self, qoi='_', name="train", dtype=KnowledgeTypes.SIMPLE): """ Reads data set from file @param qoi: string quatity of interest @param name: String for category of data set (train or test), default "train" @param dtype: knowledge type @return dictionary {dtype: {t: <DataContainer>}} WARNING: dtype parameter not supported """ # read from file s = UQSettingFormatter().deserializeFromFile(self.__filename) testSetting = UQSetting.fromJson(s) testData = testSetting.getTimeDependentResults(qoi) # load result in a dictionary ans = {} ans[dtype] = {} size = len(testData.itervalues().next()) # load data for all time steps for t, values in testData.items(): mydata = DataMatrix(size, self._dim) sol = DataVector(size) for i, (sample, res) in enumerate(values.items()): p = DataVector(sample.getActiveUnit()) mydata.setRow(i, p) sol[i] = res ans[dtype][t] = DataContainer(mydata, sol, name) return ans
def computeTrilinearFormByList(self, gpsk, basisk, alphak, gpsi, basisi, gpsj, basisj): """ Compute trilinear form for two lists of grid points @param gpsk: list of HashGridIndex @param basisk: SG++ basis for grid indices gpsk @param alphak: coefficients for kth grid @param gpsi: list of HashGridIndex @param basisi: SG++ basis for grid indices gpsi @param gpsj: list of HashGridIndex @param basisj: SG++ basis for grid indices gpsj @return: DataMatrix """ print "# evals: %i^2 * %i = %i" % (len(gpsi), len(gpsk), len(gpsi)**2 * len(gpsk)) A = DataMatrix(len(gpsi), len(gpsj)) err = 0. # run over all rows for i, gpi in enumerate(gpsi): # run over all columns for j, gpj in enumerate(gpsj): # run over all gpks b, erri = self.computeTrilinearFormByRow( gpsk, basisk, gpi, basisi, gpj, basisj) # get the overall contribution in the current dimension value = alphak.dotProduct(b) A.set(i, j, value) # error statistics err += erri return A, err
def loadData(self, qoi='_', name="train", dtype=KnowledgeTypes.SIMPLE): """ Reads data set from file @param qoi: string quatity of interest @param name: String for category of data set (train or test), default "train" @param dtype: knowledge type @return dictionary {dtype: {t: <DataContainer>}} WARNING: dtype parameter not supported """ data = self.uqSetting.getTimeDependentResults(qoi) # load result in a dictionary ans = {} ans[dtype] = {} numDims = self.uqSetting.getDim() numberOfTimesteps = len(next(iter(data.values()))) # load data for all time steps for t, values in list(data.items()): mydata = DataMatrix(numberOfTimesteps, numDims) sol = DataVector(size) for i, (sample, res) in enumerate(values.items()): p = DataVector(sample.getActiveUnit()) mydata.setRow(i, p) sol[i] = res ans[dtype][t] = DataContainer(mydata, sol, name) return ans
def computeMoments(self, ts=None): names = [ 'time', 'iteration', 'grid_size', 'mean', 'mean_err', 'meanConfidenceIntervalBootstrapping_lower', 'meanConfidenceIntervalBootstrapping_upper', 'var', 'var_err', 'varConfidenceIntervalBootstrapping_lower', 'varConfidenceIntervalBootstrapping_upper' ] # parameters ts = list(self.__samples.keys()) nrows = len(ts) ncols = len(names) data = DataMatrix(nrows, ncols) v = DataVector(ncols) row = 0 for t in np.sort(ts): v.setAll(0.0) mean = self.mean(ts=[t], iterations=[0]) var = self.var(ts=[t], iterations=[0]) numSamples = len(list(self.__samples[t].values())) v[0] = t v[1] = 0 v[2] = numSamples v[3], v[4] = mean["value"], mean["err"] v[5], v[6] = mean["confidence_interval"] v[7], v[8] = var["value"], var["err"] v[8], v[9] = var["confidence_interval"] # write results to matrix data.setRow(row, v) row += 1 return {'data': data, 'names': names}
def __getSamples(self, W, T, n): if self.samples is None: # draw n ans ans = W.rvs(n) # transform them to the unit hypercube ans = DataMatrix(n, W.getDim()) for i, sample in enumerate(ans): p = T.probabilisticToUnit(sample) ans.setRow(i, DataVector(p)) return ans else: if self.samples.shape[0] == n: dataSamples = self.samples else: ixs = np.random.randint(0, len(self.samples), n) dataSamples = self.samples[ixs, :] # check if there are just samples for a subset of the random # variables. If so, add the missing ones if self.__ixs is not None: ans = W.rvs(n) # transform them to the unit hypercube for i, sample in enumerate(dataSamples): ans[i, :] = T.probabilisticToUnit(ans[i, :]) ans[i, self.__ixs] = sample else: ans = dataSamples return DataMatrix(ans)
def __prepareDataContainer(self, data, name): """ Prepare data for learning @param data: dictionary loaded from UQSetting @return dictionary {dtype: {t: <DataContainer>}} """ ans = {} U = self.getParameters()\ .activeParams()\ .getIndependentJointDistribution() for dtype in self.getKnowledgeTypes(): ans[dtype] = {} dim = self.grid.getStorage().dim() # prepare data container depending on the given knowledge type tmp = KnowledgeTypes.transformData(data, U, dtype) # load data for all time steps for t, values in tmp.items(): size = len(values) mydata = DataMatrix(size, dim) sol = DataVector(size) for i, (sample, res) in enumerate(values.items()): p = DataVector(sample.getActiveUnit()) mydata.setRow(i, p) sol[i] = float(res) ans[dtype][t] = DataContainer(points=mydata, values=sol, name=name) return ans
def computePiecewiseConstantBF(grid, U, admissibleSet): # create bilinear form of the grid gs = grid.getStorage() A = DataMatrix(gs.size(), gs.size()) createOperationLTwoDotExplicit(A, grid) # multiply the entries with the pdf at the center of the support p = DataVector(gs.dim()) q = DataVector(gs.dim()) B = DataMatrix(admissibleSet.getSize(), gs.size()) b = DataVector(admissibleSet.getSize()) # s = np.ndarray(gs.dim(), dtype='float') for k, gpi in enumerate(admissibleSet.values()): i = gs.seq(gpi) gpi.getCoords(p) for j in xrange(gs.size()): gs.get(j).getCoords(q) # for d in xrange(gs.dim()): # # get level index # xlow = max(p[0], q[0]) # xhigh = min(p[1], q[1]) # s[d] = U[d].cdf(xhigh) - U[d].cdf(xlow) y = float(A.get(i, j) * U.pdf(p)) B.set(k, j, y) if i == j: b[k] = y return B, b
def plotGrid(self, learner, suffix): from mpl_toolkits.mplot3d.axes3d import Axes3D import matplotlib.pyplot as plt # plt.ioff() xs = np.linspace(0, 1, 30) ys = np.linspace(0, 1, 30) X, Y = np.meshgrid(xs, ys) Z = zeros(np.shape(X)) input = DataMatrix(np.shape(Z)[0] * np.shape(Z)[1], 2) r = 0 for i in range(np.shape(Z)[0]): for j in range(np.shape(Z)[1]): input.set(r, 0, X[i, j]) input.set(r, 1, Y[i, j]) r += 1 result = learner.applyData(input) r = 0 for i in range(np.shape(Z)[0]): for j in range(np.shape(Z)[1]): Z[i, j] = result[r] r += 1 fig = plt.figure() ax = Axes3D(fig) ax.plot_wireframe(X, Y, Z) #plt.draw() plt.savefig("grid3d_%s_%i.png" % (suffix, learner.iteration)) fig.clf() plt.close(plt.gcf())
def readReferenceMatrix(self, storage, filename): from pysgpp import DataVector, DataMatrix # read reference matrix try: fd = tools.gzOpen(filename, 'r') except IOError as e: fd = None if not fd: fd = tools.gzOpen('tests/' + filename, 'r') dat = fd.read().strip() fd.close() dat = dat.split('\n') dat = [l.strip().split(None) for l in dat] # right number of entries? self.assertEqual(storage.getSize(), len(dat)) self.assertEqual(storage.getSize(), len(dat[0])) m_ref = DataMatrix(len(dat), len(dat[0])) for i in range(len(dat)): for j in range(len(dat[0])): m_ref.set(i, j, float(dat[i][j])) return m_ref
def writeSurplusesLevelWise(self, filename): # locate all knowledge types available dtypes = self.__learner.getKnowledgeTypes() names = ['level'] for dtype in dtypes: names.append("surplus_%s" % KnowledgeTypes.toString(dtype)) ts = self.__knowledge.getAvailableTimeSteps() for t in ts: # collect all the surpluses classifying them by level sum data = {} n = 0 for dtype in dtypes: data[dtype] = self.computeSurplusesLevelWise(t, dtype) n = sum([len(values) for values in data[dtype].values()]) A = DataMatrix(n, len(names)) # add them to a matrix structure for i, dtype in enumerate(dtypes): k = 0 for level, surpluses in data[dtype].items(): for j, surplus in enumerate(surpluses): A.set(k + j, i + 1, surplus) A.set(k + j, 0, level) k += len(surpluses) writeDataARFF({'filename': "%s.t%s.surpluses.arff" % (filename, t), 'data': A, 'names': names})
def plotGrid(self, learner, suffix): from mpl_toolkits.mplot3d.axes3d import Axes3D import matplotlib.pyplot as plt xs = np.linspace(0, 1, 30) ys = np.linspace(0, 1, 30) X, Y = np.meshgrid(xs, ys) Z = zeros(np.shape(X)) input = DataMatrix(np.shape(Z)[0]*np.shape(Z)[1], 2) r = 0 for i in xrange(np.shape(Z)[0]): for j in xrange(np.shape(Z)[1]): input.set(r, 0, X[i,j]) input.set(r, 1, Y[i,j]) r += 1 result = learner.applyData(input) r = 0 for i in xrange(np.shape(Z)[0]): for j in xrange(np.shape(Z)[1]): Z[i,j] = result[r] r += 1 fig = plt.figure() ax = Axes3D(fig) ax.plot_wireframe(X,Y,Z) #plt.draw() plt.savefig("grid3d_%s_%i.png" % (suffix, learner.iteration)) fig.clf() plt.close(plt.gcf())
def to_data_matrix(arr): (size_x, size_y) = arr.shape matrix = DataMatrix(size_x, size_y) cur_row = 0 for x in arr: x_vec = DataVector(x.tolist()) matrix.setRow(cur_row, x_vec) cur_row += 1 return matrix
def general_test(self, d, l, bb, xs): test_desc = "dim=%d, level=%d, len(x)=%s" % (d, l, len(xs)) print(test_desc) self.grid = Grid.createLinearGrid(d) self.grid_gen = self.grid.getGenerator() self.grid_gen.regular(l) alpha = DataVector( [self.get_random_alpha() for i in range(self.grid.getSize())]) bb_ = BoundingBox(d) for d_k in range(d): dimbb = BoundingBox1D() dimbb.leftBoundary = bb[d_k][0] dimbb.rightBoundary = bb[d_k][1] bb_.setBoundary(d_k, dimbb) # Calculate the expected value without the bounding box expected_normal = [ self.calc_exp_value_normal(x, d, bb, alpha) for x in xs ] #expected_transposed = [self.calc_exp_value_transposed(x, d, bb, alpha) for x in xs] # Now set the bounding box self.grid.getStorage().setBoundingBox(bb_) dm = DataMatrix(len(xs), d) for k, x in enumerate(xs): dv = DataVector(x) dm.setRow(k, dv) multEval = createOperationMultipleEval(self.grid, dm) actual_normal = DataVector(len(xs)) #actual_transposed = DataVector(len(xs)) multEval.mult(alpha, actual_normal) #multEval.mult(alpha, actual_transposed) actual_normal_list = [] for k in range(len(xs)): actual_normal_list.append(actual_normal.__getitem__(k)) #actual_transposed_list = [] #for k in xrange(len(xs)): # actual_transposed_list.append(actual_transposed.__getitem__(k)) self.assertAlmostEqual(actual_normal_list, expected_normal) #self.assertAlmostEqual(actual_tranposed_list, expected_tranposed) del self.grid
def setUp(self): self.size = 11 self.level = 10 points = DataMatrix(self.size, 1) values = DataVector(self.size) for i in range(self.size): points.set(i, 0, i) values[i] = i self.dataContainer = DataContainer(points=points, values=values) self.policy = SequentialFoldingPolicy(self.dataContainer, self.level)
def dehierarchizeOnNewGrid(gridResult, grid, alpha): # dehierarchization gs = gridResult.getStorage() ps = DataMatrix(gs.size(), gs.dim()) p = DataVector(gs.dim()) for i in xrange(gs.size()): gs.get(i).getCoords(p) ps.setRow(i, p) nodalValues = evalSGFunctionMulti(grid, alpha, ps) return nodalValues
def setUp(self): self.size = 9 self.level = 4 points = DataMatrix(self.size, 1) values = DataVector(self.size) for i in xrange(self.size): points.set(i, 0, i) values[i] = -1 if i < self.size/2 else 1 self.dataContainer = DataContainer(points=points, values=values) self.policy = StratifiedFoldingPolicy(self.dataContainer, self.level)
def dehierarchizeOnNewGrid(gridResult, grid, alpha): # dehierarchization gsResult = gridResult.getStorage() ps = DataMatrix(gsResult.size(), gsResult.dim()) p = DataVector(gsResult.dim()) for i in xrange(gsResult.size()): gsResult.get(i).getCoords(p) ps.setRow(i, p) nodalValues = evalSGFunctionMulti(grid, alpha, ps) return nodalValues
def setUp(self): self.size = 9 self.level = 4 points = DataMatrix(self.size, 1) values = DataVector(self.size) for i in range(self.size): points.set(i, 0, i) values[i] = -1 if i < self.size // 2 else 1 self.dataContainer = DataContainer(points=points, values=values) self.policy = StratifiedFoldingPolicy(self.dataContainer, self.level)
def setUp(self): self.size = 11 self.level = 10 points = DataMatrix(self.size, 1) values = DataVector(self.size) for i in xrange(self.size): points.set(i, 0, i) values[i] = i self.dataContainer = DataContainer(points=points, values=values) self.policy = SequentialFoldingPolicy(self.dataContainer, self.level)
def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.getGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in range(DELTA_RECI): for j in range(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in range(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) opEval = createOperationEval(self.grid) for i in range(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, self.classes[i] - opEval.eval(self.alpha, coord)) #print "Errors:" #print self.errors # # Functor # self.functor = WeightedErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors)
def general_test(self, d, l, bb, xs): test_desc = "dim=%d, level=%d, len(x)=%s" % (d, l, len(xs)) print test_desc self.grid = Grid.createLinearGrid(d) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(l) alpha = DataVector([self.get_random_alpha() for i in xrange(self.grid.getSize())]) bb_ = BoundingBox(d) for d_k in xrange(d): dimbb = DimensionBoundary() dimbb.leftBoundary = bb[d_k][0] dimbb.rightBoundary = bb[d_k][1] bb_.setBoundary(d_k, dimbb) # Calculate the expected value without the bounding box expected_normal = [self.calc_exp_value_normal(x, d, bb, alpha) for x in xs] #expected_transposed = [self.calc_exp_value_transposed(x, d, bb, alpha) for x in xs] # Now set the bounding box self.grid.getStorage().setBoundingBox(bb_) dm = DataMatrix(len(xs), d) for k, x in enumerate(xs): dv = DataVector(x) dm.setRow(k, dv) multEval = createOperationMultipleEval(self.grid, dm) actual_normal = DataVector(len(xs)) #actual_transposed = DataVector(len(xs)) multEval.mult(alpha, actual_normal) #multEval.mult(alpha, actual_transposed) actual_normal_list = [] for k in xrange(len(xs)): actual_normal_list.append(actual_normal.__getitem__(k)) #actual_transposed_list = [] #for k in xrange(len(xs)): # actual_transposed_list.append(actual_transposed.__getitem__(k)) self.assertAlmostEqual(actual_normal_list, expected_normal) #self.assertAlmostEqual(actual_tranposed_list, expected_tranposed) del self.grid
def buildTrainingVector(data): from pysgpp import DataMatrix dim = len(data["data"]) training = DataMatrix(len(data["data"][0]), dim) # i iterates over the data points, d over the dimension of one data point for i in xrange(len(data["data"][0])): for d in xrange(dim): training.set(i, d, data["data"][d][i]) return training
def buildTrainingVector(data): from pysgpp import DataMatrix dim = len(data["data"]) training = DataMatrix(len(data["data"][0]), dim) # i iterates over the data points, d over the dimension of one data point for i in range(len(data["data"][0])): for d in range(dim): training.set(i, d, data["data"][d][i]) return training
def loadData(self, name="train"): fin = self.__gzOpen(self.filename, "r") data = [] classes = [] hasclass = False # get the different section of ARFF-File for line in fin: sline = line.strip().lower() if sline.startswith(b"%") or len(sline) == 0: continue if sline.startswith(b"@data"): break if sline.startswith(b"@attribute"): value = sline.split() if value[1].startswith(b"class"): hasclass = True else: data.append([]) #read in the data stored in the ARFF file for line in fin: sline = line.strip() if sline.startswith(b"%") or len(sline) == 0: continue values = sline.split(b",") if hasclass: classes.append(float(values[-1])) values = values[:-1] for i in range(len(values)): data[i].append(float(values[i])) # cleaning up and return fin.close() dim = len(data) size = len(data[0]) dataMatrix = DataMatrix(size, dim) tempVector = DataVector(dim) valuesVector = DataVector(size) for rowIndex in range(size): for colIndex in range(dim): tempVector[colIndex] = data[colIndex][rowIndex] dataMatrix.setRow(rowIndex, tempVector) valuesVector[rowIndex] = classes[rowIndex] return DataContainer(points=dataMatrix, values=valuesVector, name=name, filename=self.filename)
def dehierarchize(grid, alpha): # dehierarchization gs = grid.getStorage() p = DataVector(gs.dim()) nodalValues = DataVector(gs.size()) A = DataMatrix(gs.size(), gs.dim()) for i in xrange(gs.size()): gs.get(i).getCoords(p) A.setRow(i, p) opEval = createOperationMultipleEval(grid, A) opEval.mult(alpha, nodalValues) return nodalValues
def setUp(self): self.size = 11 self.level = 10 self.seed = 42 points = DataMatrix(self.size, 1) values = DataVector(self.size) for i in xrange(self.size): points.set(i, 0, i) values[i] = i self.dataContainer = DataContainer(points=points, values=values) self.policy = RandomFoldingPolicy(self.dataContainer, self.level, self.seed)
def sampleGrids(self, filename): ts = self.__uqManager.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.getDimension() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data = np.vstack((data.T, res)).T # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = np.ndarray((gs.getSize(), dim)) x = DataVector(dim) for i in range(gs.getSize()): gp = gs.getPoint(i) gs.getCoordinates(gp, x) data[i, :] = x.array() # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def writeSensitivityValues(self, filename): def keymap(key): names = self.__uqManager.getParameters().activeParams().getNames() ans = [names[i] for i in key] return ",".join(ans) # parameters ts = self.__knowledge.getAvailableTimeSteps() gs = self.__knowledge.getGrid(self._qoi).getStorage() n = len(ts) n1 = gs.getDimension() n2 = 2**n1 - 1 data = DataMatrix(n, n1 + n2 + 1) names = ['time'] + [None] * (n1 + n2) for k, t in enumerate(ts): # estimated anova decomposition anova = self.getAnovaDecomposition(t=t) me = anova.getSobolIndices() if len(me) != n2: import ipdb ipdb.set_trace() n2 = len(me) te = anova.getTotalEffects() n1 = len(te) v = DataVector(n1 + n2 + 1) v.setAll(0.0) v[0] = t for i, key in enumerate( anova.getSortedPermutations(list(te.keys()))): v[i + 1] = te[key] if k == 0: names[i + 1] = '"$T_{' + keymap(key) + '}$"' for i, key in enumerate( anova.getSortedPermutations(list(me.keys()))): v[n1 + i + 1] = me[key] if k == 0: names[n1 + 1 + i] = '"$S_{' + keymap(key) + '}$"' data.setRow(k, v) writeDataARFF({ 'filename': filename + ".sa.stats.arff", 'data': data, 'names': names })
def estimateDiscreteL2Error(grid, alpha, f, n=1000): gs = grid.getStorage() # create control samples samples = DataMatrix(np.random.rand(n, gs.dim())) nodalValues = evalSGFunctionMulti(grid, alpha, samples) fvalues = DataVector(samples.getNrows()) for i, sample in enumerate(samples.array()): fvalues[i] = f(sample) # compute the difference nodalValues.sub(fvalues) return nodalValues.l2Norm()
def setUp(self): # # Grid # self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in xrange(DELTA_RECI): for j in xrange(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in xrange(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, self.classes[i] - self.grid.eval(self.alpha, coord)) # # Functor # self.functor = PersistentErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors) self.accum = DataVector(self.alpha.__len__()) self.accum.setAll(0.0)
def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.getGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in range(DELTA_RECI): for j in range(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in range(DELTA_RECI**2)] self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.multEval = createOperationMultipleEval(self.grid, self.trainData) opEval = createOperationEval(self.grid) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in range(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, abs(self.classes[i] - opEval.eval(self.alpha, coord))) # # OnlinePredictiveRefinementDimension # hash_refinement = HashRefinement() self.strategy = OnlinePredictiveRefinementDimension(hash_refinement) self.strategy.setTrainDataset(self.trainData) self.strategy.setClasses(self.classes) self.strategy.setErrors(self.errors)
def sampleGrids(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction(self._qoi, t) # init gs = grid.getStorage() dim = gs.dim() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data.transpose() data.appendRow() data.setRow(data.getNrows() - 1, res) data.transpose() # write results writeDataARFF({'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = DataMatrix(gs.size(), dim) data.setAll(0.0) for i in xrange(gs.size()): gp = gs.get(i) v = np.array([gp.getCoord(j) for j in xrange(dim)]) data.setRow(i, DataVector(v)) # write results writeDataARFF({'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def computeBilinearFormQuad(grid, U): gs = grid.getStorage() basis = getBasis(grid) A = DataMatrix(gs.size(), gs.size()) level = DataMatrix(gs.size(), gs.dim()) index = DataMatrix(gs.size(), gs.dim()) gs.getLevelIndexArraysForEval(level, index) s = np.ndarray(gs.dim(), dtype='float') # run over all rows for i in xrange(gs.size()): gpi = gs.get(i) # run over all columns for j in xrange(i, gs.size()): # print "%i/%i" % (i * gs.size() + j + 1, gs.size() ** 2) gpj = gs.get(j) for d in xrange(gs.dim()): # get level index lid, iid = level.get(i, d), index.get(i, d) ljd, ijd = level.get(j, d), index.get(j, d) # compute left and right boundary of the support of both # basis functions lb = max([(iid - 1) / lid, (ijd - 1) / ljd]) ub = min([(iid + 1) / lid, (ijd + 1) / ljd]) # same level, different index if lid == ljd and iid != ijd: s[d] = 0. # the support does not overlap elif lid != ljd and lb >= ub: s[d] = 0. else: lid, iid = gpi.getLevel(d), int(iid) ljd, ijd = gpj.getLevel(d), int(ijd) # ---------------------------------------------------- # use scipy for integration def f(x): return basis.eval(lid, iid, x) * \ basis.eval(ljd, ijd, x) * \ U[d].pdf(x) s[d], _ = quad(f, lb, ub, epsabs=1e-8) # ---------------------------------------------------- A.set(i, j, float(np.prod(s))) A.set(j, i, A.get(i, j)) return A
def loadData(self, name = "train"): fin = self.__gzOpen(self.filename, "r") data = [] classes = [] hasclass = False # get the different section of ARFF-File for line in fin: sline = line.strip().lower() if sline.startswith("%") or len(sline) == 0: continue if sline.startswith("@data"): break if sline.startswith("@attribute"): value = sline.split() if value[1].startswith("class"): hasclass = True else: data.append([]) #read in the data stored in the ARFF file for line in fin: sline = line.strip() if sline.startswith("%") or len(sline) == 0: continue values = sline.split(",") if hasclass: classes.append(float(values[-1])) values = values[:-1] for i in xrange(len(values)): data[i].append(float(values[i])) # cleaning up and return fin.close() dim = len(data) size = len(data[0]) dataMatrix = DataMatrix(size, dim) tempVector = DataVector(dim) valuesVector = DataVector(size) for rowIndex in xrange(size): for colIndex in xrange(dim): tempVector[colIndex] = data[colIndex][rowIndex] dataMatrix.setRow(rowIndex, tempVector) valuesVector[rowIndex] = classes[rowIndex] return DataContainer(points=dataMatrix, values=valuesVector, name=name, filename=self.filename)
def getDataSubsetByIndexList(self, indices, name="train"): size = len(indices) subset_points = DataMatrix(size, self.dim) subset_values = DataVector(size) row = DataVector(self.dim) points = self.getPoints() values = self.getValues() i = 0 for index in indices: points.getRow(index, row) subset_points.setRow(i, row) subset_values[i] = values[index] i = i + 1 return DataContainer(points=subset_points, values=subset_values, name=name)
def writeSensitivityValues(self, filename): def keymap(key): names = self.getLearner().getParameters().activeParams().getNames() ans = [names[i] for i in key] return ",".join(ans) # parameters ts = self.__knowledge.getAvailableTimeSteps() gs = self.__knowledge.getGrid(self._qoi).getStorage() n = len(ts) n1 = gs.dim() n2 = 2 ** n1 - 1 data = DataMatrix(n, n1 + n2 + 1) names = ['time'] + [None] * (n1 + n2) for k, t in enumerate(ts): # estimated anova decomposition anova = self.getAnovaDecomposition(t=t) me = anova.getSobolIndices() if len(me) != n2: import ipdb; ipdb.set_trace() n2 = len(me) te = anova.getTotalEffects() n1 = len(te) v = DataVector(n1 + n2 + 1) v.setAll(0.0) v[0] = t for i, key in enumerate(anova.getSortedPermutations(te.keys())): v[i + 1] = te[key] if k == 0: names[i + 1] = '"$T_{' + keymap(key) + '}$"' for i, key in enumerate(anova.getSortedPermutations(me.keys())): v[n1 + i + 1] = me[key] if k == 0: names[n1 + 1 + i] = '"$S_{' + keymap(key) + '}$"' data.setRow(k, v) writeDataARFF({'filename': filename + ".sa.stats.arff", 'data': data, 'names': names})
def computeBFQuad(grid, U, admissibleSet, n=100): """ @param grid: Grid @param U: list of distributions @param admissibleSet: AdmissibleSet @param n: int, number of MC samples """ gs = grid.getStorage() basis = getBasis(grid) A = DataMatrix(admissibleSet.getSize(), gs.size()) b = DataVector(admissibleSet.getSize()) s = np.ndarray(gs.dim(), dtype='float') # run over all rows for i, gpi in enumerate(admissibleSet.values()): # run over all columns for j in xrange(gs.size()): # print "%i/%i" % (i * gs.size() + j + 1, gs.size() ** 2) gpj = gs.get(j) for d in xrange(gs.dim()): # get level index lid, iid = gpi.getLevel(d), gpi.getIndex(d) ljd, ijd = gpj.getLevel(d), gpj.getIndex(d) # compute left and right boundary of the support of both # basis functions xlow = max([(iid - 1) * 2 ** -lid, (ijd - 1) * 2 ** -ljd]) xhigh = min([(iid + 1) * 2 ** -lid, (ijd + 1) * 2 ** -ljd]) # same level, different index if lid == ljd and iid != ijd: s[d] = 0. # the support does not overlap elif lid != ljd and xlow >= xhigh: s[d] = 0. else: # ---------------------------------------------------- # use scipy for integration def f(x): return basis.eval(lid, iid, x) * \ basis.eval(ljd, ijd, x) * \ U[d].pdf(x) s[d], _ = quad(f, xlow, xhigh, epsabs=1e-8) # ---------------------------------------------------- A.set(i, j, float(np.prod(s))) if gs.seq(gpi) == j: b[i] = A.get(i, j) return A, b
def dehierarchizeList(grid, alpha, gps): """ evaluate sparse grid function at grid points in gps @param grid: Grid @param alpha: DataVector @param gps: list of HashGridIndex """ dim = grid.getStorage().dim() p = DataVector(dim) nodalValues = DataVector(len(gps)) A = DataMatrix(len(gps), dim) for i, gp in enumerate(gps): gp.getCoords(p) A.setRow(i, p) createOperationMultipleEval(grid, A).mult(alpha, nodalValues) return nodalValues
def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [trans.getBounds() for trans in transformation.getTransformations()] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE(self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile
def eval_fullGrid(level, dim, border=True): if border: grid = Grid.createLinearBoundaryGrid(dim) else: grid = Grid.createLinearGrid(dim) grid.createGridGenerator().full(level) gs = grid.getStorage() ans = DataMatrix(gs.size(), dim) p = DataVector(dim) for i in xrange(gs.size()): gs.get(i).getCoords(p) ans.setRow(i, p) return ans
def testSave(self): filename = pathlocal + '/datasets/saving.arff.gz' testPoints = [[0.307143,0.130137,0.050000], [0.365584,0.105479,0.050000], [0.178571,0.201027,0.050000], [0.272078,0.145548,0.050000], [0.318831,0.065411,0.050000], [0.190260,0.086986,0.050000], [0.190260,0.062329,0.072500], [0.120130,0.068493,0.072500], [0.225325,0.056164,0.072500], [0.213636,0.050000,0.072500] ] testValues = [-1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, -1.000000, -1.000000, -1.000000, -1.000000] attributes = { "x0":"NUMERIC", "x1":"NUMERIC", "x2":"NUMERIC", "class":"NUMERIC", } size = len(testPoints) dim = len(testPoints[0]) point = DataVector(dim) points = DataMatrix(size, dim) for row in xrange(size): for col in xrange(dim): point[col] = testPoints[row][col] points.setRow(row, point) adapter = ARFFAdapter(filename) adapter.save(points, testValues, attributes) (points, values) = adapter.loadData().getPointsValues() size = len(testPoints) dim = len(testPoints[0]) testVector = DataVector(dim) for rowIdx in xrange(size): points.getRow(rowIdx, testVector) for colIdx in xrange(dim): if cvar.USING_DOUBLE_PRECISION: self.assertEqual(testVector[colIdx], testPoints[rowIdx][colIdx]) else: self.assertAlmostEqual(testVector[colIdx], testPoints[rowIdx][colIdx]) self.assertEqual(values[rowIdx], testValues[rowIdx]) os.remove(filename)
def computeErrors(jgrid, jalpha, grid1, alpha1, grid2, alpha2, n=200): """ Compute some errors to estimate the quality of the interpolation. @param jgrid: Grid, new discretization @param jalpha: DataVector, new surpluses @param grid1: Grid, old discretization @param alpha1: DataVector, old surpluses @param grid2: Grid, old discretization @param alpha2: DataVector, old surpluses @return: tuple(<float>, <float>), maxdrift, l2norm """ jgs = jgrid.getStorage() # create control samples samples = DataMatrix(np.random.rand(n, jgs.dim())) # evaluate the sparse grid functions jnodalValues = evalSGFunctionMulti(jgrid, jalpha, samples) # eval grids nodalValues1 = evalSGFunctionMulti(grid1, alpha1, samples) nodalValues2 = evalSGFunctionMulti(grid2, alpha2, samples) # compute errors p = DataVector(jgs.dim()) err = DataVector(n) for i in xrange(n): samples.getRow(i, p) y = nodalValues1[i] * nodalValues2[i] if abs(jnodalValues[i]) > 1e100: err[i] = 0.0 else: err[i] = abs(y - jnodalValues[i]) # get error statistics # l2 l2norm = err.l2Norm() # maxdrift err.abs() maxdrift = err.max() return maxdrift, l2norm