def estimateDiscreteL2Error(grid, alpha, f, n=1000): gs = grid.getStorage() # create control samples samples = DataMatrix(np.random.rand(n, gs.dim())) nodalValues = evalSGFunctionMulti(grid, alpha, samples) fvalues = DataVector(samples.getNrows()) for i, sample in enumerate(samples.array()): fvalues[i] = f(sample) # compute the difference nodalValues.sub(fvalues) return nodalValues.l2Norm()
def ppf(self, x, shuffle=False): x = self._convertEvalPoint(x) x_matrix = DataMatrix(x) res_matrix = DataMatrix(x_matrix.getNrows(), x_matrix.getNcols()) res_matrix.setAll(0.0) # do the transformation opRosen = createOperationInverseRosenblattTransformationKDE(self.dist) if shuffle: opRosen.doShuffledTransformation(x_matrix, res_matrix) else: opRosen.doTransformation(x_matrix, res_matrix) # transform the outcome res = res_matrix.array() if res.shape[0] == 1 and res.shape[1] == 1: return res[0, 0] else: return res
class TestOnlinePredictiveRefinementDimension(unittest.TestCase): def test_manual(self): print "#" * 20 result = {(1, 0): 5, (2, 0): 25} # # Grid # DIM = 1 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [[0.1], [0.4], [0.6], [0.8]] errs = [1, 2, 3, 4] self.trainData = DataMatrix(xs) self.errors = DataVector(errs) self.multEval = createOperationMultipleEval(self.grid, self.trainData) self.dim = DIM self.storage = self.grid.getStorage() self.gridSize = self.grid.getSize() # # OnlinePredictiveRefinementDimension # print "OnlineRefinementDim" hash_refinement = HashRefinement() online = OnlinePredictiveRefinementDimension(hash_refinement) online.setTrainDataset(self.trainData) online.setErrors(self.errors) online_result = refinement_map({}) online.collectRefinablePoints(self.grid.getStorage(), 10, online_result) for k, v in online_result.iteritems(): print k, v for k, v in online_result.iteritems(): self.assertAlmostEqual(online_result[k], result[k]) # # Naive # print print "Naive" naive_result = self.naive_calc() for k, v in naive_result.iteritems(): print k, v for k, v in naive_result.iteritems(): self.assertAlmostEqual(naive_result[k], result[k]) def naive_calc(self): result = {} for j in xrange(self.gridSize): HashGridIndex = self.storage.get(j) HashGridIndex.setLeaf(False) print "Point: ", j, " (", HashGridIndex.toString(), ")" for d in xrange(self.dim): print "Dimension: ", d # # Get left and right child # leftChild = HashGridIndex(HashGridIndex) rightChild = HashGridIndex(HashGridIndex) self.storage.left_child(leftChild, d) self.storage.right_child(rightChild, d) # # Check if point is refinable # if self.storage.has_key(leftChild) or self.storage.has_key( rightChild): continue # # Insert children temporarily # self.storage.insert(leftChild) self.storage.insert(rightChild) val1 = self.naive_calc_single(leftChild) print "Left Child: ", val1 val2 = self.naive_calc_single(rightChild) print "Right Child: ", val2 self.storage.deleteLast() self.storage.deleteLast() result[(j, d)] = val1 + val2 print "" return result def naive_calc_single(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) alpha.setAll(0.0) alpha.__setitem__(seq, 1.0) col = DataVector(numData) self.multEval.mult(alpha, col) print col col.sqr() denom = col.sum() print num print denom if denom == 0: print "Denominator is zero" value = 0 else: value = num / denom return value def tearDown(self): del self.grid
class LibAGFDist(Dist): """ The Sparse Grid Density Estimation (SGDE) distribution """ def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [ trans.getBounds() for trans in transformation.getTransformations() ] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE( self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile @classmethod def byConfig(cls, config): if config is not None and os.path.exists(config): # init density function traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile = \ cls.computeDensity(config) return cls.byFiles(traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile) @classmethod def byFiles(cls, trainDataFile, samplesFile=None, testFile=None, testOutFile=None, bandwidthFile=None, surfaceFile=None): # load training file if os.path.exists(trainDataFile): trainData = np.loadtxt(trainDataFile) if len(trainData.shape) == 1: trainData = np.array([trainData]).transpose() else: raise Exception('The training data file "%s" does not exist' % trainDataFile) # load samples for quadrature samples = None if samplesFile is not None: if os.path.exists(samplesFile): samples = np.loadtxt(samplesFile) # if the data is just one dimensional -> transform to # matrix with one column if len(samples.shape) == 1: samples = np.array([samples]).transpose() # load test file for evaluating pdf values testData = None if testFile is not None: if os.path.exists(testFile): testData = np.loadtxt(testFile) # if the data is just one dimensional -> transform to # matrix with one column if len(testData.shape) == 1: testData = np.array([testData]).transpose() # load bandwidths file for evaluating pdf values bandwidths = None if bandwidthFile is not None: if os.path.exists(bandwidthFile): bandwidths = np.loadtxt(bandwidthFile) # load pdf values for testSamples if available if testOutFile is not None: if os.path.exists(testOutFile): testLikelihood = np.loadtxt(testOutFile) # store the results in a hash map if testData is not None: testDataEval = {} for i, sample in enumerate(testData): testDataEval[tuple(sample)] = testLikelihood[i] if surfaceFile is not None and not os.path.exists(surfaceFile): surfaceFile = None return cls(trainData, samples=samples, testData=testDataEval, bandwidths=bandwidths, surfaceFile=surfaceFile) @classmethod def computeDensity( self, config, pathsgpp='/home/franzefn/workspace/SGppUQ/lib/sgpp', cluster='/home/franzefn/Promotion/UQ/benjamin/clustc/cluster'): if not os.path.exists(config): raise Exception('the config file "%s" does not exist' % config) os.environ['LD_LIBRARY_PATH'] = pathsgpp # ret = subprocess.Popen([clustc, "-c %s" % config], shell=True, env=os.environ) # ret = subprocess.call([clustc, "-c %s" % config], shell=True) ret = os.system("%s -c %s > out_libagf.log" % (cluster, config)) if ret != 0: raise Exception('The density estimation exited unexpectedly') # extract grid and alpha from config s = cp.ConfigParser() s.optionxform = str s.read(config) traindatafile = s.get('files', 'inFileTrain') samplesfile = None if 'samplesNumberSamples' in s.options('denest') and \ s.get('denest', 'samplesNumberSamples') > 0 and \ 'samplesOutput' in s.options('denest'): samplesfile = s.get('denest', 'samplesOutput') testFile = None if 'inFileTest' in s.options('files'): testFile = s.get('files', 'inFileTest') testOutFile = None if 'outFileTest' in s.options('files') and \ 'inFileTest' in s.options('files'): testOutFile = s.get('files', 'outFileTest') bandwidthsfile = None if 'printBandwidthsFile' in s.options('denest'): bandwidthsfile = s.get('denest', 'printBandwidthsFile') surfacefile = None if 'printSurfaceFile' in s.options('denest'): surfacefile = s.get('denest', 'printSurfaceFile') return traindatafile, samplesfile, testFile, testOutFile, bandwidthsfile, surfacefile def pdf_libagf(self, x): if isNumerical(x): x = [x] x = tuple(x) if x in self.testData: return self.testData[x] else: raise AttributeError("No pdf value for '%s' available" % (x, )) def pdf(self, x): n = self.trainData.getNrows() sigma = self.bandwidths.array() # normalization coefficient norm = 1. / (sigma * np.sqrt(2. * np.pi)) trainData = self.trainData.array() # normalize it trainData = (x - trainData) / sigma trainData = norm * np.exp(-trainData**2 / 2.) # scale the result by the number of samples return np.sum(np.prod(trainData, axis=1)) / n def cdf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation op = createOperationRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def ppf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation assert A.getNcols() == B.getNcols() == self.trainData.getNcols() op = createOperationInverseRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def rvs(self, n=1): ixs = np.random.randint(0, len(self.samples), n) return self.samples[ixs, :] def mean(self, n=1e4): moment = 0. for sample, _ in self.testData.items(): moment += np.prod(sample) return moment / len(self.testData) def var(self): mean = self.mean() moment = 0. for sample, _ in self.testData.items(): moment += (np.prod(sample) - mean)**2 return moment / (len(self.testData) - 1) def getBounds(self): return self.bounds def getDim(self): return self.dim def getDistributions(self): return [self] def gnuplot(self, jpegFile, gnuplotConfig=None): if self.surfaceFile is not None and os.path.exists(self.surfaceFile): gnuplot = """ set terminal jpeg set output "%s" set view map set size ratio .9 set object 1 rect from graph 0, graph 0 to graph 1, graph 1 back set object 1 rect fc rgb "black" fillstyle solid 1.0 splot '%s' using 1:2:3 with points pointtype 5 pointsize 1 palette linewidth 0 """ if gnuplotConfig is None: gnuplotConfig = 'gnuplot.config' fd = open(gnuplotConfig, "w") fd.write(gnuplot % (jpegFile, self.surfaceFile)) fd.close() os.system("gnuplot %s" % gnuplotConfig) # ----------------------------------------------------------- else: raise Exception( 'surface file not found. specify "printSurfaceFile" in [denest] section of config' ) return def __str__(self): return "libAGF"
class TestWeightedRefinementOperator(unittest.TestCase): def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.getGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in range(DELTA_RECI): for j in range(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in range(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) opEval = createOperationEval(self.grid) for i in range(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, self.classes[i] - opEval.eval(self.alpha, coord)) #print "Errors:" #print self.errors # # Functor # self.functor = WeightedErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors) def test_1(self): storage = self.grid.getStorage() coord = DataVector(storage.getDimension()) num_coeff = self.alpha.__len__() values = [ self.functor.__call__(storage, i) for i in range(storage.getSize()) ] expect = [] opEval = createOperationEval(self.grid) for i in range(num_coeff): # print i val = 0 single = DataVector(num_coeff) single.__setitem__(i, self.alpha.__getitem__(i)) for j in range(self.trainData.getNrows()): self.trainData.getRow(j, coord) val += abs( opEval.eval(single, coord) * (self.errors.__getitem__(j)**2)) expect.append(val) # print values # print expect # print [ values[i]/expect[i] for i in xrange(values.__len__())] self.assertEqual(values, expect)
class TestPersistentRefinementOperator(unittest.TestCase): def setUp(self): # # Grid # self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in xrange(DELTA_RECI): for j in xrange(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in xrange(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, self.classes[i] - self.grid.eval(self.alpha, coord)) # # Functor # self.functor = PersistentErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors) self.accum = DataVector(self.alpha.__len__()) self.accum.setAll(0.0) def test_1(self): storage = self.grid.getStorage() coord = DataVector(storage.dim()) num_coeff = self.alpha.__len__() # # First part # values = [ self.functor.__call__(storage, i) for i in xrange(storage.size()) ] expect = [] for j in xrange(num_coeff): row = DataVector(DIM) tmp_alpha = DataVector(self.alpha.__len__()) tmp_alpha.setAll(0.0) tmp_alpha.__setitem__(j, 1.0) current = 0 for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, row) current += (self.errors.__getitem__(i) * self.grid.eval(tmp_alpha, row))**2 self.accum.__setitem__( j, self.accum.__getitem__(j) * (1 - BETA) + BETA * current * abs(self.alpha.__getitem__(j))) expect.append(self.accum.__getitem__(j)) self.assertEqual(values, expect) # # Second part # values = [ self.functor.__call__(storage, i) for i in xrange(storage.size()) ] expect = [] for j in xrange(num_coeff): row = DataVector(DIM) tmp_alpha = DataVector(self.alpha.__len__()) tmp_alpha.setAll(0.0) tmp_alpha.__setitem__(j, 1.0) current = 0 for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, row) current += (self.errors.__getitem__(i) * self.grid.eval(tmp_alpha, row))**2 self.accum.__setitem__( j, self.accum.__getitem__(j) * (1 - BETA) + BETA * current * abs(self.alpha.__getitem__(j))) expect.append(self.accum.__getitem__(j)) self.assertEqual(values, expect)
def var(self, grid, alpha, U, T, mean): r""" Extraction of the expectation the given sparse grid function interpolating the product of function value and pdf. \int\limits_{[0, 1]^d} (f(x) - E(f))^2 * pdf(x) dx """ # extract correct pdf for moment estimation vol, W = self._extractPDFforMomentEstimation(U, T) D = T.getTransformations() # copy the grid, and add a trapezoidal boundary # ngrid = GridDescriptor().fromGrid(grid)\ # .withBorder(BorderTypes.TRAPEZOIDBOUNDARY)\ # .createGrid() # compute nodalValues # ngs = ngrid.getStorage() # nodalValues = DataVector(ngs.size()) # p = DataVector(ngs.dim()) # for i in xrange(ngs.size()): # ngs.get(i).getCoords(p) # nodalValues[i] = evalSGFunction(grid, alpha, p) - mean # # # hierarchize the new function # nalpha = hierarchize(ngrid, nodalValues) ngs = grid.getStorage() ngrid, nalpha = grid, alpha # compute the integral of the product times the pdf acc = DataMatrix(ngs.size(), ngs.size()) acc.setAll(1.) err = 0 for i, dims in enumerate(W.getTupleIndices()): dist = W[i] trans = D[i] # get the objects needed for integrating # the current dimensions gpsi, basisi = project(ngrid, dims) if isinstance(dist, SGDEdist): # project distribution on desired dimensions # get the objects needed for integrating # the current dimensions gpsk, basisk = project(dist.grid, range(len(dims))) # compute the bilinear form tf = TrilinearGaussQuadratureStrategy([dist], trans) A, erri = tf.computeTrilinearFormByList(gpsk, basisk, dist.alpha, gpsi, basisi, gpsi, basisi) else: # we compute the bilinear form of the grids # compute the bilinear form if len(dims) == 1: dist = [dist] trans = [trans] bf = BilinearGaussQuadratureStrategy(dist, trans) A, erri = bf.computeBilinearFormByList(gpsi, basisi, gpsi, basisi) # accumulate the results acc.componentwise_mult(A) # accumulate the error err += acc.sum() / (acc.getNrows() * acc.getNcols()) * erri # compute the variance tmp = DataVector(acc.getNrows()) self.mult(acc, nalpha, tmp) moment = vol * nalpha.dotProduct(tmp) moment = moment - mean ** 2 return moment, err
class LibAGFDist(Dist): """ The Sparse Grid Density Estimation (SGDE) distribution """ def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [trans.getBounds() for trans in transformation.getTransformations()] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE(self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile @classmethod def byConfig(cls, config): if config is not None and os.path.exists(config): # init density function traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile = \ cls.computeDensity(config) return cls.byFiles(traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile) @classmethod def byFiles(cls, trainDataFile, samplesFile=None, testFile=None, testOutFile=None, bandwidthFile=None, surfaceFile=None): # load training file if os.path.exists(trainDataFile): trainData = np.loadtxt(trainDataFile) if len(trainData.shape) == 1: trainData = np.array([trainData]).transpose() else: raise Exception('The training data file "%s" does not exist' % trainDataFile) # load samples for quadrature samples = None if samplesFile is not None: if os.path.exists(samplesFile): samples = np.loadtxt(samplesFile) # if the data is just one dimensional -> transform to # matrix with one column if len(samples.shape) == 1: samples = np.array([samples]).transpose() # load test file for evaluating pdf values testData = None if testFile is not None: if os.path.exists(testFile): testData = np.loadtxt(testFile) # if the data is just one dimensional -> transform to # matrix with one column if len(testData.shape) == 1: testData = np.array([testData]).transpose() # load bandwidths file for evaluating pdf values bandwidths = None if bandwidthFile is not None: if os.path.exists(bandwidthFile): bandwidths = np.loadtxt(bandwidthFile) # load pdf values for testSamples if available if testOutFile is not None: if os.path.exists(testOutFile): testLikelihood = np.loadtxt(testOutFile) # store the results in a hash map if testData is not None: testDataEval = {} for i, sample in enumerate(testData): testDataEval[tuple(sample)] = testLikelihood[i] if surfaceFile is not None and not os.path.exists(surfaceFile): surfaceFile = None return cls(trainData, samples=samples, testData=testDataEval, bandwidths=bandwidths, surfaceFile=surfaceFile) @classmethod def computeDensity(self, config, pathsgpp='/home/franzefn/workspace/SGppUQ/lib/sgpp', cluster='/home/franzefn/Promotion/UQ/benjamin/clustc/cluster'): if not os.path.exists(config): raise Exception('the config file "%s" does not exist' % config) os.environ['LD_LIBRARY_PATH'] = pathsgpp # ret = subprocess.Popen([clustc, "-c %s" % config], shell=True, env=os.environ) # ret = subprocess.call([clustc, "-c %s" % config], shell=True) ret = os.system("%s -c %s > out_libagf.log" % (cluster, config)) if ret != 0: raise Exception('The density estimation exited unexpectedly') # extract grid and alpha from config s = cp.ConfigParser() s.optionxform = str s.read(config) traindatafile = s.get('files', 'inFileTrain') samplesfile = None if 'samplesNumberSamples' in s.options('denest') and \ s.get('denest', 'samplesNumberSamples') > 0 and \ 'samplesOutput' in s.options('denest'): samplesfile = s.get('denest', 'samplesOutput') testFile = None if 'inFileTest' in s.options('files'): testFile = s.get('files', 'inFileTest') testOutFile = None if 'outFileTest' in s.options('files') and \ 'inFileTest' in s.options('files'): testOutFile = s.get('files', 'outFileTest') bandwidthsfile = None if 'printBandwidthsFile' in s.options('denest'): bandwidthsfile = s.get('denest', 'printBandwidthsFile') surfacefile = None if 'printSurfaceFile' in s.options('denest'): surfacefile = s.get('denest', 'printSurfaceFile') return traindatafile, samplesfile, testFile, testOutFile, bandwidthsfile, surfacefile def pdf_libagf(self, x): if isNumerical(x): x = [x] x = tuple(x) if x in self.testData: return self.testData[x] else: raise AttributeError("No pdf value for '%s' available" % (x,)) def pdf(self, x): n = self.trainData.getNrows() sigma = self.bandwidths.array() # normalization coefficient norm = 1. / (sigma * np.sqrt(2. * np.pi)) trainData = self.trainData.array() # normalize it trainData = (x - trainData) / sigma trainData = norm * np.exp(-trainData ** 2 / 2.) # scale the result by the number of samples return np.sum(np.prod(trainData, axis=1)) / n def cdf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation op = createOperationRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def ppf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation assert A.getNcols() == B.getNcols() == self.trainData.getNcols() op = createOperationInverseRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def rvs(self, n=1): ixs = np.random.randint(0, len(self.samples), n) return self.samples[ixs, :] def mean(self, n=1e4): moment = 0. for sample, _ in self.testData.items(): moment += np.prod(sample) return moment / len(self.testData) def var(self): mean = self.mean() moment = 0. for sample, _ in self.testData.items(): moment += (np.prod(sample) - mean) ** 2 return moment / (len(self.testData) - 1) def getBounds(self): return self.bounds def getDim(self): return self.dim def getDistributions(self): return [self] def gnuplot(self, jpegFile, gnuplotConfig=None): if self.surfaceFile is not None and os.path.exists(self.surfaceFile): gnuplot = """ set terminal jpeg set output "%s" set view map set size ratio .9 set object 1 rect from graph 0, graph 0 to graph 1, graph 1 back set object 1 rect fc rgb "black" fillstyle solid 1.0 splot '%s' using 1:2:3 with points pointtype 5 pointsize 1 palette linewidth 0 """ if gnuplotConfig is None: gnuplotConfig = 'gnuplot.config' fd = open(gnuplotConfig, "w") fd.write(gnuplot % (jpegFile, self.surfaceFile)) fd.close() os.system("gnuplot %s" % gnuplotConfig) # ----------------------------------------------------------- else: raise Exception('surface file not found. specify "printSurfaceFile" in [denest] section of config') return def __str__(self): return "libAGF"
class TestOnlinePredictiveRefinementDimension(unittest.TestCase): def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.getGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1 / DELTA) for i in range(DELTA_RECI): for j in range(DELTA_RECI): xs.append([DELTA * i, DELTA * j]) random.seed(1208813) ys = [random.randint(-10, 10) for i in range(DELTA_RECI**2)] self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.multEval = createOperationMultipleEval(self.grid, self.trainData) opEval = createOperationEval(self.grid) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in range(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__( i, abs(self.classes[i] - opEval.eval(self.alpha, coord))) # # OnlinePredictiveRefinementDimension # hash_refinement = HashRefinement() self.strategy = OnlinePredictiveRefinementDimension(hash_refinement) self.strategy.setTrainDataset(self.trainData) self.strategy.setClasses(self.classes) self.strategy.setErrors(self.errors) def test_1(self): storage = self.grid.getStorage() gridSize = self.grid.getSize() numDim = storage.getDimension() print("######") print("Expected result:") print("######") expected = {} for j in range(gridSize): HashGridPoint = storage.getPoint(j) HashGridPoint.setLeaf(False) print("Point: ", j, " (", HashGridPoint.toString(), ")") for d in range(numDim): # # Get left and right child # leftChild = HashGridPoint(HashGridPoint) rightChild = HashGridPoint(HashGridPoint) storage.left_child(leftChild, d) storage.right_child(rightChild, d) # # Check if point is refinable # if storage.isContaining(leftChild) or storage.isContaining( rightChild): continue # # Insert children temporarily # storage.insert(leftChild) storage.insert(rightChild) val1 = self.calc_indicator_value(leftChild) val2 = self.calc_indicator_value(rightChild) storage.deleteLast() storage.deleteLast() print("Dimension: ", d) print("Left Child: ", val1) print("Right Child: ", val2) print("") expected[(j, d)] = val1 + val2 print("") for k, v in list(expected.items()): print((k, v)) print("######") print("Actual result:") print("######") actual = refinement_map({}) self.strategy.collectRefinablePoints(storage, 10, actual) for k, v in list(actual.items()): print((k, v)) # # Assertions # for k, v in list(expected.items()): self.assertEqual(actual[k], v) def calc_indicator_value(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) col = DataVector(numData) alpha.__setitem__(seq, 1.0) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: print("Denominator is zero") value = 0 else: value = num / denom return value
class TestWeightedRefinementOperator(unittest.TestCase): def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1/DELTA) for i in xrange(DELTA_RECI): for j in xrange(DELTA_RECI): xs.append([DELTA*i, DELTA*j]) random.seed(1208813) ys = [ random.randint(-10, 10) for i in xrange(DELTA_RECI**2)] # print xs # print ys self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__ (i, self.classes[i] - self.grid.eval(self.alpha, coord)) #print "Errors:" #print self.errors # # Functor # self.functor = WeightedErrorRefinementFunctor(self.alpha, self.grid) self.functor.setTrainDataset(self.trainData) self.functor.setClasses(self.classes) self.functor.setErrors(self.errors) def test_1(self): storage = self.grid.getStorage() coord = DataVector(storage.dim()) num_coeff = self.alpha.__len__() values = [self.functor.__call__(storage,i) for i in xrange(storage.size())] expect = [] for i in xrange(num_coeff): # print i val = 0 single = DataVector(num_coeff) single.__setitem__(i, self.alpha.__getitem__(i)) for j in xrange(self.trainData.getNrows()): self.trainData.getRow(j, coord) val += abs( self.grid.eval(single, coord) * (self.errors.__getitem__(j)**2) ) expect.append(val) # print values # print expect # print [ values[i]/expect[i] for i in xrange(values.__len__())] self.assertEqual(values, expect)
class TestOnlinePredictiveRefinementDimension(unittest.TestCase): def test_manual(self): print "#"*20 result = {(1, 0): 5, (2, 0): 25} # # Grid # DIM = 1 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [[0.1], [0.4], [0.6], [0.8]] errs = [1, 2, 3, 4] self.trainData = DataMatrix(xs) self.errors = DataVector(errs) self.multEval = createOperationMultipleEval(self.grid, self.trainData) self.dim = DIM self.storage = self.grid.getStorage() self.gridSize = self.grid.getSize() # # OnlinePredictiveRefinementDimension # print "OnlineRefinementDim" hash_refinement = HashRefinement(); online = OnlinePredictiveRefinementDimension(hash_refinement) online.setTrainDataset(self.trainData) online.setErrors(self.errors) online_result = refinement_map({}) online.collectRefinablePoints(self.grid.getStorage(), 10, online_result) for k,v in online_result.iteritems(): print k, v for k,v in online_result.iteritems(): self.assertAlmostEqual(online_result[k], result[k]) # # Naive # print print "Naive" naive_result = self.naive_calc() for k,v in naive_result.iteritems(): print k, v for k,v in naive_result.iteritems(): self.assertAlmostEqual(naive_result[k], result[k]) def naive_calc(self): result = {} for j in xrange(self.gridSize): HashGridIndex = self.storage.get(j) HashGridIndex.setLeaf(False) print "Point: ", j, " (", HashGridIndex.toString(), ")" for d in xrange(self.dim): print "Dimension: ", d # # Get left and right child # leftChild = HashGridIndex(HashGridIndex) rightChild = HashGridIndex(HashGridIndex) self.storage.left_child(leftChild, d) self.storage.right_child(rightChild, d) # # Check if point is refinable # if self.storage.has_key(leftChild) or self.storage.has_key(rightChild): continue # # Insert children temporarily # self.storage.insert(leftChild) self.storage.insert(rightChild) val1 = self.naive_calc_single(leftChild) print "Left Child: ", val1 val2 = self.naive_calc_single(rightChild) print "Right Child: ", val2 self.storage.deleteLast() self.storage.deleteLast() result[(j, d)] = val1 + val2 print "" return result def naive_calc_single(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) alpha.setAll(0.0) alpha.__setitem__(seq, 1.0) col = DataVector(numData) self.multEval.mult(alpha, col) print col col.sqr() denom = col.sum() print num print denom if denom == 0: print "Denominator is zero" value = 0 else: value = num/denom return value def tearDown(self): del self.grid
class TestOnlinePredictiveRefinementDimension(unittest.TestCase): def setUp(self): # # Grid # DIM = 2 LEVEL = 2 self.grid = Grid.createLinearGrid(DIM) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(LEVEL) # # trainData, classes, errors # xs = [] DELTA = 0.05 DELTA_RECI = int(1/DELTA) for i in xrange(DELTA_RECI): for j in xrange(DELTA_RECI): xs.append([DELTA*i, DELTA*j]) random.seed(1208813) ys = [ random.randint(-10, 10) for i in xrange(DELTA_RECI**2)] self.trainData = DataMatrix(xs) self.classes = DataVector(ys) self.alpha = DataVector([3, 6, 7, 9, -1]) self.multEval = createOperationMultipleEval(self.grid, self.trainData) self.errors = DataVector(DELTA_RECI**2) coord = DataVector(DIM) for i in xrange(self.trainData.getNrows()): self.trainData.getRow(i, coord) self.errors.__setitem__ (i, abs(self.classes[i] - self.grid.eval(self.alpha, coord))) # # OnlinePredictiveRefinementDimension # hash_refinement = HashRefinement(); self.strategy = OnlinePredictiveRefinementDimension(hash_refinement) self.strategy.setTrainDataset(self.trainData) self.strategy.setClasses(self.classes) self.strategy.setErrors(self.errors) def test_1(self): storage = self.grid.getStorage() gridSize = self.grid.getSize() numDim = storage.dim() print "######" print "Expected result:" print "######" expected = {} for j in xrange(gridSize): HashGridIndex = storage.get(j) HashGridIndex.setLeaf(False) print "Point: ", j, " (", HashGridIndex.toString(), ")" for d in xrange(numDim): # # Get left and right child # leftChild = HashGridIndex(HashGridIndex) rightChild = HashGridIndex(HashGridIndex) storage.left_child(leftChild, d) storage.right_child(rightChild, d) # # Check if point is refinable # if storage.has_key(leftChild) or storage.has_key(rightChild): continue # # Insert children temporarily # storage.insert(leftChild) storage.insert(rightChild) val1 = self.calc_indicator_value(leftChild) val2 = self.calc_indicator_value(rightChild) storage.deleteLast() storage.deleteLast() print "Dimension: ", d print "Left Child: ", val1 print "Right Child: ", val2 print "" expected[(j, d)] = val1 + val2 print "" for k, v in expected.iteritems(): print(k, v) print "######" print "Actual result:" print "######" actual = refinement_map({}) self.strategy.collectRefinablePoints(storage, 10, actual) for k, v in actual.iteritems(): print(k, v) # # Assertions # for k, v in expected.iteritems(): self.assertEqual(actual[k], v) def calc_indicator_value(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) col = DataVector(numData) alpha.__setitem__(seq, 1.0) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: print "Denominator is zero" value = 0 else: value = num/denom return value
def var(self, grid, alpha, U, T, mean): r""" Extraction of the expectation the given sparse grid function interpolating the product of function value and pdf. \int\limits_{[0, 1]^d} (f(x) - E(f))^2 * pdf(x) dx """ # extract correct pdf for moment estimation vol, W = self._extractPDFforMomentEstimation(U, T) D = T.getTransformations() # copy the grid, and add a trapezoidal boundary # ngrid = GridDescriptor().fromGrid(grid)\ # .withBorder(BorderTypes.TRAPEZOIDBOUNDARY)\ # .createGrid() # compute nodalValues # ngs = ngrid.getStorage() # nodalValues = DataVector(ngs.size()) # p = DataVector(ngs.dim()) # for i in xrange(ngs.size()): # ngs.get(i).getCoords(p) # nodalValues[i] = evalSGFunction(grid, alpha, p) - mean # # # hierarchize the new function # nalpha = hierarchize(ngrid, nodalValues) ngs = grid.getStorage() ngrid, nalpha = grid, alpha # compute the integral of the product times the pdf acc = DataMatrix(ngs.size(), ngs.size()) acc.setAll(1.) err = 0 for i, dims in enumerate(W.getTupleIndices()): dist = W[i] trans = D[i] # get the objects needed for integrating # the current dimensions gpsi, basisi = project(ngrid, dims) if isinstance(dist, SGDEdist): # project distribution on desired dimensions # get the objects needed for integrating # the current dimensions gpsk, basisk = project(dist.grid, range(len(dims))) # compute the bilinear form tf = TrilinearGaussQuadratureStrategy([dist], trans) A, erri = tf.computeTrilinearFormByList( gpsk, basisk, dist.alpha, gpsi, basisi, gpsi, basisi) else: # we compute the bilinear form of the grids # compute the bilinear form if len(dims) == 1: dist = [dist] trans = [trans] bf = BilinearGaussQuadratureStrategy(dist, trans) A, erri = bf.computeBilinearFormByList(gpsi, basisi, gpsi, basisi) # accumulate the results acc.componentwise_mult(A) # accumulate the error err += acc.sum() / (acc.getNrows() * acc.getNcols()) * erri # compute the variance tmp = DataVector(acc.getNrows()) self.mult(acc, nalpha, tmp) moment = vol * nalpha.dotProduct(tmp) moment = moment - mean**2 return moment, err
class TestOnlinePredictiveRefinementDimension(unittest.TestCase): def test_automatic(self): d = [2,3,4] l = [1,2] num_points = [2,3,4] num_tests = 1 for i in xrange(num_tests): d_k = random.choice(d) l_k = random.choice(l) n_k = random.choice(num_points) print d_k, "dim,", l_k, "level,", n_k, "num data points" self.general_test(d_k, l_k, n_k) def _test_fail(self): # For l >= 3, the naive algorithm does not evaluate some points (thus, the result is 0) # E.g. for d = 2, the value of all grid points (1, 1, X, Y) with seq numbers 9-12 are 0 num_tests = 1000 for i in xrange(num_tests): d_k = 2 l_k = 3 n_k = 250 print d_k, "dim,", l_k, "level,", n_k, "num data points" self.general_test(d_k, l_k, n_k) def general_test(self, d, l, num): # print "#"*20 # print xs = [self.get_random_x(d) for i in xrange(num)] dupl = True while dupl: dupl_tmp = False for x in xs: for y in xs: if x == y: dupl = True break if dupl: break dupl = dupl_tmp xs = [self.get_random_x(d) for i in xrange(num)] errs = [self.get_random_err() for i in xrange(num)] self.grid = Grid.createLinearGrid(d) self.grid_gen = self.grid.createGridGenerator() self.grid_gen.regular(l) self.trainData = DataMatrix(xs) self.errors = DataVector(errs) self.multEval = createOperationMultipleEval(self.grid, self.trainData) self.dim = d self.storage = self.grid.getStorage() self.gridSize = self.grid.getSize() # # OnlinePredictiveRefinementDimension # # print "OnlineRefinementDim" hash_refinement = HashRefinement(); online = OnlinePredictiveRefinementDimension(hash_refinement) online.setTrainDataset(self.trainData) online.setErrors(self.errors) online_result = refinement_map({}) online.collectRefinablePoints(self.storage, 5, online_result) # for k,v in online_result.iteritems(): # print k, v # # Naive # # print # print "Naive" naive_result = self.naive_calc() # for k,v in naive_result.iteritems(): # print k, v # # OnlinePredictiveRefinementDimensionOld # hash_refinement = HashRefinement(); online_old = OnlinePredictiveRefinementDimensionOld(hash_refinement) # # Assertions # for k,v in online_result.iteritems(): if abs(online_result[k] - naive_result[k]) >= 0.1: #print "Error in:", k #print online_result[k] #print naive_result[k] #print naive_result #print "Datapoints" #print xs #print "Errors" #print errs #print "All values:" #print "Key: Online result, naive result" #for k,v in online_result.iteritems(): # print("{} ({}): {}, {}".format(k, self.storage.get(k[0]).toString(), v, naive_result[k])) self.assertTrue(False) # self.assertAlmostEqual(online_result[k], naive_result[k]) del self.grid del self.grid_gen del self.trainData del self.errors del self.multEval del self.storage def naive_calc(self): result = {} for j in xrange(self.gridSize): HashGridIndex = self.storage.get(j) HashGridIndex.setLeaf(False) # print "Point: ", j, " (", HashGridIndex.toString(), ")" for d in xrange(self.dim): # print "Dimension: ", d # # Get left and right child # leftChild = HashGridIndex(HashGridIndex) rightChild = HashGridIndex(HashGridIndex) self.storage.left_child(leftChild, d) self.storage.right_child(rightChild, d) # # Check if point is refinable # if self.storage.has_key(leftChild) or self.storage.has_key(rightChild): continue # # Insert children temporarily # self.storage.insert(leftChild) self.storage.insert(rightChild) val1 = self.naive_calc_single(leftChild) # print "Left Child: ", val1 val2 = self.naive_calc_single(rightChild) # print "Right Child: ", val2 self.storage.deleteLast() self.storage.deleteLast() result[(j, d)] = val1 + val2 # print "" return result def naive_calc_single(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) alpha.setAll(0.0) alpha.__setitem__(seq, 1.0) col = DataVector(numData) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: # print "Denominator is zero" value = 0 else: value = num/denom return value def get_random_x(self, d): DELTA = 0.10 x = [] for i in xrange(d): x.append(random.choice(numpy.arange(0, 1.01, DELTA))) return x def get_random_x_wo_boundary(self, d): DELTA = 0.10 x = [] for i in xrange(d): x.append(random.choice(numpy.arange(0.1, 0.91, DELTA))) return x def get_random_err(self): DELTA = 0.1 return random.choice(numpy.arange(-3, 3.01, DELTA)) def get_random_err_pos(self): DELTA = 0.1 return random.choice(numpy.arange(0, 3.01, DELTA))