def compareBTMatrices(m1, m2): n = m1.getSize() # lines m = m1.getDim() # columns # check row sum v = DataVector(m) values = [] for i in range(n): m1.getRow(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): print((values_ref[i], values[i])) # check col sum v = DataVector(n) values = [] for i in range(m): m1.getColumn(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(m): m2.getColumn(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(m): print((values_ref[i], values[i]))
def compareBBTMatrices(testCaseClass, m1, m2): from pysgpp import DataVector, cvar places = 5 if cvar.USING_DOUBLE_PRECISION else 3 # check dimensions testCaseClass.assertEqual(m1.getNrows(), m1.getNcols()) testCaseClass.assertEqual(m1.getNrows(), m2.getNrows()) testCaseClass.assertEqual(m1.getSize(), m2.getSize()) n = m1.getNrows() # check diagonal values = [] for i in range(n): values.append(m1.get(i, i)) values.sort() values_ref = [] for i in range(n): values_ref.append(m2.get(i, i)) values_ref.sort() for i in range(n): testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Diagonal %f != %f" % (values[i], values_ref[i])) # check row sum v = DataVector(n) values = [] for i in range(n): m1.getRow(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): # print values_ref[i], values[i] testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Row sum %f != %f" % (values[i], values_ref[i])) # check col sum v = DataVector(n) values = [] for i in range(n): m1.getColumn(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getColumn(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Col sum %f != %f" % (values[i], values_ref[i]))
def compareBTMatrices(testCaseClass, m1, m2): from pysgpp import DataVector places = 5 # check dimensions testCaseClass.assertEqual(m1.getNrows(), m2.getNrows()) testCaseClass.assertEqual(m1.getNcols(), m2.getNcols()) n = m1.getNrows() # lines m = m1.getNcols() # columns # check row sum v = DataVector(m) values = [] for i in range(n): m1.getRow(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): #print values_ref[i], values[i] testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Row sum %f != %f" % (values[i], values_ref[i])) # check col sum v = DataVector(n) values = [] for i in range(m): m1.getColumn(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(m): m2.getColumn(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(m): testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Col sum %f != %f" % (values[i], values_ref[i]))
def compareStiffnessMatrices(testCaseClass, m1, m2): from pysgpp import DataVector # check dimensions testCaseClass.assertEqual(m1.getSize(), m1.getDim()) testCaseClass.assertEqual(m1.getSize(), m2.getSize()) testCaseClass.assertEqual(m1.getDim(), m2.getDim()) n = m1.getSize() # check diagonal values = [] for i in range(n): values.append(m1[i*n + i]) values.sort() values_ref = [] for i in range(n): values_ref.append(m2[i*n + i]) values_ref.sort() for i in range(n): testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Diagonal %f != %f" % (values[i], values_ref[i])) # check row sum v = DataVector(n) values = [] for i in range(n): m1.getRow(i,v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i,v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): #print values_ref[i], values[i] testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Row sum %f != %f" % (values[i], values_ref[i])) # check col sum v = DataVector(n) values = [] for i in range(n): m1.getColumn(i,v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getColumn(i,v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Col sum %f != %f" % (values[i], values_ref[i]))
def generateLaplaceMatrix(factory, level, verbose=False): from pysgpp import DataVector storage = factory.getStorage() gen = factory.createGridGenerator() gen.regular(level) laplace = factory.createOperationLaplace() # create vector alpha = DataVector(storage.size()) erg = DataVector(storage.size()) # create stiffness matrix m = DataVector(storage.size(), storage.size()) m.setAll(0) for i in xrange(storage.size()): # apply unit vectors alpha.setAll(0) alpha[i] = 1 laplace.mult(alpha, erg) if verbose: print erg, erg.sum() m.setColumn(i, erg) return m
def calc_indicator_value(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) col = DataVector(numData) alpha.__setitem__(seq, 1.0) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: print("Denominator is zero") value = 0 else: value = num / denom return value
def calc_indicator_value(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) col = DataVector(numData) alpha.__setitem__(seq, 1.0) self.multEval.mult(alpha, col) col.sqr() denom = col.sum() if denom == 0: print "Denominator is zero" value = 0 else: value = num/denom return value
def compareBBTMatrices(m1, m2): # check dimensions n = m1.getSize() # check diagonal values = [] for i in range(n): values.append(m1[i * n + i]) values.sort() values_ref = [] for i in range(n): values_ref.append(m2[i * n + i]) values_ref.sort() for i in range(n): print((values_ref[i], values[i])) # check row sum v = DataVector(n) values = [] for i in range(n): m1.getRow(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): print((values_ref[i], values[i])) # check col sum v = DataVector(n) values = [] for i in range(n): m1.getColumn(i, v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getColumn(i, v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): print((values_ref[i], values[i]))
def compareBTMatrices(testCaseClass, m1, m2): from pysgpp import DataVector # check dimensions testCaseClass.assertEqual(m1.getNrows(), m2.getNrows()) testCaseClass.assertEqual(m1.getNcols(), m2.getNcols()) n = m1.getNrows() # lines m = m1.getNcols() # columns # check row sum v = DataVector(m) values = [] for i in range(n): m1.getRow(i,v) values.append(v.sum()) values.sort() values_ref = [] for i in range(n): m2.getRow(i,v) values_ref.append(v.sum()) values_ref.sort() for i in range(n): #print values_ref[i], values[i] testCaseClass.assertAlmostEqual(values[i], values_ref[i], 5, msg="Row sum %f != %f" % (values[i], values_ref[i])) # check col sum v = DataVector(n) values = [] for i in range(m): m1.getColumn(i,v) values.append(v.sum()) values.sort() values_ref = [] for i in range(m): m2.getColumn(i,v) values_ref.append(v.sum()) values_ref.sort() for i in range(m): testCaseClass.assertAlmostEqual(values[i], values_ref[i], 5, msg="Col sum %f != %f" % (values[i], values_ref[i]))
def naive_calc_single(self, index): numData = self.trainData.getNrows() numCoeff = self.grid.getSize() seq = self.grid.getStorage().seq(index) num = 0 denom = 0 tmp = DataVector(numCoeff) self.multEval.multTranspose(self.errors, tmp) num = tmp.__getitem__(seq) num **= 2 alpha = DataVector(numCoeff) alpha.setAll(0.0) alpha.__setitem__(seq, 1.0) col = DataVector(numData) self.multEval.mult(alpha, col) print col col.sqr() denom = col.sum() print num print denom if denom == 0: print "Denominator is zero" value = 0 else: value = num / denom return value
class Regressor(Learner): ## Errors per basis function errors = None ## Error vector error = None ##constructor def __init__(self): super(Regressor, self).__init__() ##calculate L2-norm of error # @return: last L2-norm of error def getL2NormError(self): return sqrt(self.error.sum()) ## calculate max error # @return: max error def getMaxError(self): return sqrt(self.error.max()) ## calculate min error # @return: min error def getMinError(self): return sqrt(self.error.min()) ## Evaluate regression MSE # # @param data: DataContainer dataset # @param alpha: DataVector alpha-vector # @return: mean square error def evalError(self, data, alpha): size = data.getPoints().getNrows() if size == 0: return 0 self.error = DataVector(size) self.specification.getBOperator(data.getName()).mult(alpha, self.error) self.error.sub(data.getValues()) # error vector self.error.sqr() # entries squared errorsum = self.error.sum() mse = errorsum / size # MSE # calculate error per basis function self.errors = DataVector(len(alpha)) self.specification.getBOperator(data.getName()).multTranspose( self.error, self.errors) self.errors.componentwise_mult(alpha) return mse ##Update different statistics about training progress # @param alpha: DataVector alpha-vector # @param trainSubset: DataContainer with training data # @param testSubset: DataContainer with validation data, default value: None def updateResults(self, alpha, trainSubset, testSubset=None): self.knowledge.update(alpha) #eval Error for training data and append it to other in this iteration self.trainAccuracy.append(self.evalError(trainSubset, alpha)) i = float(len(self.trainAccuracy)) #eval error for test data and append it to other in this iteration if testSubset != None: self.testAccuracy.append(self.evalError(testSubset, alpha)) self.testingOverall.append(sum(self.testAccuracy) / i) self.trainingOverall.append(sum(self.trainAccuracy) / i) self.numberPoints.append(self.grid.getSize()) ##Refines grid with the number of points as specified in corresponding TrainingSpecification object def refineGrid(self): self.notifyEventControllers(LearnerEvents.REFINING_GRID) pointsNum = self.specification.getNumOfPointsToRefine( self.grid.getGenerator().getNumberOfRefinablePoints()) self.grid.getGenerator().refine( SurplusRefinementFunctor(self.errors, pointsNum, self.specification.getAdaptThreshold()))
class Regressor(Learner): """ Subclass of Learner, responsible for regression. The methods specific for regression are implemented here. """ def __init__(self): """ Constructor """ super(self.__class__, self).__init__() # Errors per basis function self.errors = None # Error vector self.error = None def __getattr__(self, attr): """ Overrides built-in method if method called is not a object method of this Descriptor, most probably it's a method of the learner so it tries to call the method from our specification @param attr: string method name @return: method call in specification """ return getattr(self.specification, attr) # # Learn data from training data set and use validation data set to prevent overfitting # # @param dataset: DataContainer object with data sets, default value None (initialized data set used) # @return: DataVector of alpha def learnDataWithTest(self, dataset=None): self.notifyEventControllers( LearnerEvents.LEARNING_WITH_TESTING_STARTED) B = createOperationMultipleEval( self.grid, self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY)) self.specification.setBOperator(B) if dataset is None: dataset = self.dataContainer # learning step trainSubset = dataset.getTrainDataset() # testpoint = data.allPoint\points # testvalues = data.allValues\values testSubset = dataset.getTestDataset() while True: # repeat until policy says "stop" self.notifyEventControllers( LearnerEvents.LEARNING_WITH_TESTING_STEP_STARTED) self.alpha = self.doLearningIteration(trainSubset) # calculate avg. error for training and test data and avg. for refine alpha self.updateResults(self.alpha, trainSubset, testSubset) self.notifyEventControllers( LearnerEvents.LEARNING_WITH_TESTING_STEP_COMPLETE) self.iteration += 1 if self.stopPolicy.isTrainingComplete(self): break # refine grid self.refineGrid() self.notifyEventControllers( LearnerEvents.LEARNING_WITH_TESTING_COMPLETE) return self.alpha # # Simple data learning # # @return: DataVector of alpha def learnData(self): self.notifyEventControllers(LearnerEvents.LEARNING_STARTED) self.specification.setBOperator( createOperationMultipleEval( self.grid, self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY))) print(self.getL()) while True: # repeat until policy says "stop" print( "Learning %i/%i" % (self.iteration, self.stopPolicy.getAdaptiveIterationLimit())) self.notifyEventControllers(LearnerEvents.LEARNING_STEP_STARTED) # learning step self.alpha = self.doLearningIteration(self.dataContainer) # calculate avg. error for training and test data and avg. for refine alpha self.updateResults(self.alpha, self.dataContainer) self.notifyEventControllers(LearnerEvents.LEARNING_STEP_COMPLETE) self.iteration += 1 if (self.stopPolicy.isTrainingComplete(self)): break # refine grid self.refineGrid() # from pysgpp.extensions.datadriven.uq.plot import plotNodal3d # plotNodal3d(self.grid, self.alpha) # data = self.dataContainer.getPoints('train').array() # fig = plt.figure() # plt.plot(data[:, 0], data[:, 1], ' ', marker='v') # fig.show() # plt.show() self.notifyEventControllers(LearnerEvents.LEARNING_COMPLETE) return self.alpha # # Learn data with cross-fold validation # # @return: list of DataVector alpha in different folds def learnDataWithFolding(self, ): self.notifyEventControllers( LearnerEvents.LEARNING_WITH_FOLDING_STARTED) self.specification.setBOperator( createOperationMultipleEval( self.grid, self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY))) # update folding self.updateFoldingPolicy() alphas = [] for dataset in self.foldingPolicy: alphas.append(self.learnDataWithTest(dataset)) self.notifyEventControllers( LearnerEvents.LEARNING_WITH_FOLDING_COMPLETE) return alphas # # Perform one learning step # # @param set: DataContainer training data set # @return: DataVector alpha vector def doLearningIteration(self, set): # initialize values self.linearSystem = DMSystemMatrix(self.grid, set.getPoints(), self.specification.getCOperator(), self.specification.getL()) size = self.grid.getSize() # Reuse data from old alpha vector increasing its dimension if self.solver.getReuse() and self.alpha is not None: alpha = DataVector(self.alpha) alpha.resize(size) # Use new alpha vector else: alpha = DataVector(size) alpha.setAll(0.0) b = DataVector(size) self.linearSystem.generateb(set.getValues(), b) # calculates alphas self.solver.solve(self.linearSystem, alpha, b, self.solver.getReuse(), False, self.solver.getThreshold()) return alpha def getL2NormError(self): """ calculate L2-norm of error @return: last L2-norm of error """ return np.sqrt(self.error.sum()) def getMaxError(self): """ calculate max error @return: max error """ return np.sqrt(self.error.max()) def getMinError(self): """ calculate min error @return: min error """ return np.sqrt(self.error.min()) def evalError(self, data, alpha): """ Evaluate regression MSE @param data: DataContainer data set @param alpha: DataVector alpha-vector @return: mean square error """ size = data.getPoints().getNrows() if size == 0: return 0 self.error = DataVector(size) self.getBOperator().mult(alpha, self.error) # error vector self.error.sub(data.getValues()) # entries squared self.error.sqr() errorsum = self.error.sum() # MSE mse = errorsum / size # calculate error per basis function self.errors = DataVector(len(alpha)) self.getBOperator().multTranspose(self.error, self.errors) self.errors.componentwise_mult(alpha) # calculate error per basis function # self.errors = DataVector(alpha.getSize()) # self.specification.getBOperator().mult(self.error, data.getPoints(), self.errors) return mse def updateResults(self, alpha, trainSubset, testSubset=None): """ Update different statistics about training progress @param alpha: DataVector alpha-vector @param trainSubset: DataContainer with training data @param testSubset: DataContainer with validation data """ # self.knowledge.update(alpha) # eval Error for training data and append it to other in this iteration self.trainAccuracy.append(self.evalError(trainSubset, alpha)) i = float(len(self.trainAccuracy)) # eval error for test data and append it to other in this iteration if testSubset is not None: self.testAccuracy.append(self.evalError(testSubset, alpha)) self.testingOverall.append(sum(self.testAccuracy) / i) self.trainingOverall.append(sum(self.trainAccuracy) / i) self.numberPoints.append(self.grid.getSize()) def refineGrid(self): """ Refines grid with the number of points as specified in corresponding TrainingSpecification object """ self.notifyEventControllers(LearnerEvents.REFINING_GRID) refinableNum = self.grid.getGenerator().getNumberOfRefinablePoints() pointsNum = self.getNumOfPointsToRefine(refinableNum) functor = SurplusRefinementFunctor(self.errors, pointsNum, self.getAdaptThreshold()) self.grid.getGenerator().refine(functor)
class TestDataVector(unittest.TestCase): ## # Set up, create random DataVector and corresponding Python data structures. # @test DataVector::get(), DataVector::set() def setUp(self): from pysgpp import DataVector import random ## number of rows self.nrows = 5 ## number of columns self.ncols = 4 ## number of entries self.N = self.nrows*self.ncols ## random list of lists self.l_rand = [[2*(random.random()-0.5) for j in xrange(self.ncols)] for i in xrange(self.nrows)] ## same as l_rand, but flattened self.l_rand_total = [] for li in self.l_rand: self.l_rand_total.extend(li) # ## Data Vector, corresponding to l_rand # self.d_rand = DataVector(self.nrows,self.ncols) # for i in xrange(self.N): # self.d_rand[i] = self.l_rand_total[i] # # for i in xrange(self.N): # self.assertEqual(self.d_rand[i], self.l_rand_total[i]) ## Data Vector, corresponding to l_rand self.d_rand = DataVector(self.N) for i in xrange(self.N): self.d_rand[i] = self.l_rand_total[i] for i in xrange(self.N): self.assertEqual(self.d_rand[i], self.l_rand_total[i]) ## # Constructors4. # @test DataVector::DataVector(size_t size), DataVector::DataVector(size_t size, size_t dim), DataVector::DataVector(DataVectorDefinition &DataVectorDef), DataVector::getSize(), DataVector::getDim(), DataVector::getSize() # @todo (pflueged) DataVector::DataVector(double *input, size_t size, size_t dim) def testConstructor(self): from pysgpp import DataVector d = DataVector(2) self.assertEqual(len(d), 2) # getSize() # d = DataVector(2,3) # self.assertEqual(d.getSize(), 2) # self.assertEqual(d.getDim(), 3) # self.assertEqual(len(d), 2*3) # getSize() # # d2 = DataVector(self.d_rand) # for i in xrange(self.N): # self.assertEqual(d2[i], self.d_rand[i]) # self.assertEqual(d2.getSize(), self.nrows) # self.assertEqual(d2.getDim(), self.ncols) # self.assertEqual(len(d2), self.N) # d2[self.ncols] = -4.0 # self.assertNotEqual(d2[self.ncols], self.d_rand[self.ncols]) ## # Min, Max operations. # @test DataVector::min(int d), DataVector::max(int d), DataVector::minmax(int d, double *min, double *max), DataVector::min(), DataVector::max() def testMinMax(self): # # test dimension-dependent min, max # for j in xrange(self.ncols): # minj = min([self.l_rand[i][j] for i in xrange(self.nrows)]) # maxj = max([self.l_rand[i][j] for i in xrange(self.nrows)]) # self.assertEqual(self.d_rand.min(j), minj) # self.assertEqual(self.d_rand.max(j), maxj) # mi, ma = self.d_rand.minmax(j) # self.assertEqual(mi, minj) # self.assertEqual(ma, maxj) # test global min, max self.assertEqual(self.d_rand.min(), min(self.l_rand_total)) self.assertEqual(self.d_rand.max(), max(self.l_rand_total)) ## # Operations on DataVectors. # @test DataVector::sum(), DataVector::sqr(), DataVector::abs(), DataVector::componentwise_mult(), DataVector::componentwise_div() def testOps(self): from pysgpp import DataVector # sum self.assertAlmostEqual(self.d_rand.sum(), sum(self.l_rand_total)) # sqr d = DataVector(self.d_rand) d.sqr() for i in xrange(self.N): self.assertEqual(self.d_rand[i]**2, d[i]) # abs d = DataVector(self.d_rand) d.abs() for i in xrange(self.N): self.assertEqual(abs(self.d_rand[i]), d[i]) # componentwise_mult d = DataVector(self.d_rand) # d2 = DataVector(self.nrows, self.ncols) d2 = DataVector(self.N) for i in xrange(self.N): d2[i] = i d.componentwise_mult(d2) for i in xrange(self.N): self.assertEqual(self.d_rand[i]*i, d[i]) # componentwise_div d = DataVector(self.d_rand) for i in xrange(self.N): d2[i] = i+1 d.componentwise_div(d2) for i in xrange(self.N): self.assertEqual(self.d_rand[i]/(i+1), d[i]) ## # Vector-Operations # @test DataVector::dotProduct(DataVector &vec) def testDotProduct(self): from pysgpp import DataVector x = 0 d = DataVector(3) for i in xrange(len(d)): d[i] = i + 1 x += d[i] * d[i] self.assertEqual(d.dotProduct(d), x)
class Regressor(Learner): ## Errors per basis function errors = None ## Error vector error = None ##constructor def __init__(self): super(Regressor,self).__init__() ##calculate L2-norm of error # @return: last L2-norm of error def getL2NormError(self): return sqrt(self.error.sum()) ## calculate max error # @return: max error def getMaxError(self): return sqrt(self.error.max()) ## calculate min error # @return: min error def getMinError(self): return sqrt(self.error.min()) ## Evaluate regression MSE # # @param data: DataContainer dataset # @param alpha: DataVector alpha-vector # @return: mean square error def evalError(self, data, alpha): size = data.getPoints().getNrows() if size == 0: return 0 self.error = DataVector(size) self.specification.getBOperator(data.getName()).mult(alpha, self.error) self.error.sub(data.getValues()) # error vector self.error.sqr() # entries squared errorsum = self.error.sum() mse = errorsum / size # MSE # calculate error per basis function self.errors = DataVector(len(alpha)) self.specification.getBOperator(data.getName()).multTranspose(self.error, self.errors) self.errors.componentwise_mult(alpha) return mse ##Update different statistics about training progress # @param alpha: DataVector alpha-vector # @param trainSubset: DataContainer with training data # @param testSubset: DataContainer with validation data, default value: None def updateResults(self, alpha, trainSubset, testSubset = None): self.knowledge.update(alpha) #eval Error for training data and append it to other in this iteration self.trainAccuracy.append(self.evalError(trainSubset, alpha)) i = float(len(self.trainAccuracy)) #eval error for test data and append it to other in this iteration if testSubset != None: self.testAccuracy.append(self.evalError(testSubset, alpha)) self.testingOverall.append(sum(self.testAccuracy)/i) self.trainingOverall.append(sum(self.trainAccuracy)/i) self.numberPoints.append(self.grid.getSize()) ##Refines grid with the number of points as specified in corresponding TrainingSpecification object def refineGrid(self): self.notifyEventControllers(LearnerEvents.REFINING_GRID) pointsNum = self.specification.getNumOfPointsToRefine( self.grid.createGridGenerator().getNumberOfRefinablePoints() ) self.grid.createGridGenerator().refine( SurplusRefinementFunctor(self.errors, pointsNum, self.specification.getAdaptThreshold()) )
class Regressor(Learner): """ Subclass of Learner, responsible for regression. The methods specific for regression are implemented here. """ def __init__(self): """ Constructor """ super(self.__class__, self).__init__() # Errors per basis function self.errors = None # Error vector self.error = None def __getattr__(self, attr): """ Overrides built-in method if method called is not a object method of this Descriptor, most probably it's a method of the learner so it tries to call the method from our specification @param attr: string method name @return: method call in specification """ return getattr(self.specification, attr) def getL2NormError(self): """ calculate L2-norm of error @return: last L2-norm of error """ return np.sqrt(self.error.sum()) def getMaxError(self): """ calculate max error @return: max error """ return np.sqrt(self.error.max()) def getMinError(self): """ calculate min error @return: min error """ return np.sqrt(self.error.min()) def evalError(self, data, alpha): """ Evaluate regression MSE @param data: DataContainer data set @param alpha: DataVector alpha-vector @return: mean square error """ size = data.getPoints().getNrows() if size == 0: return 0 self.error = DataVector(size) self.getBOperator().mult(alpha, self.error) # error vector self.error.sub(data.getValues()) # entries squared self.error.sqr() errorsum = self.error.sum() # MSE mse = errorsum / size # calculate error per basis function self.errors = DataVector(len(alpha)) self.getBOperator().multTranspose(self.error, self.errors) self.errors.componentwise_mult(alpha) # calculate error per basis function # self.errors = DataVector(alpha.getSize()) # self.specification.getBOperator().mult(self.error, data.getPoints(), self.errors) return mse def updateResults(self, alpha, trainSubset, testSubset=None): """ Update different statistics about training progress @param alpha: DataVector alpha-vector @param trainSubset: DataContainer with training data @param testSubset: DataContainer with validation data """ # self.knowledge.update(alpha) # eval Error for training data and append it to other in this iteration self.trainAccuracy.append(self.evalError(trainSubset, alpha)) i = float(len(self.trainAccuracy)) # eval error for test data and append it to other in this iteration if testSubset is not None: self.testAccuracy.append(self.evalError(testSubset, alpha)) self.testingOverall.append(sum(self.testAccuracy) / i) self.trainingOverall.append(sum(self.trainAccuracy) / i) self.numberPoints.append(self.grid.getSize()) def refineGrid(self): """ Refines grid with the number of points as specified in corresponding TrainingSpecification object """ self.notifyEventControllers(LearnerEvents.REFINING_GRID) refinableNum = self.grid.createGridGenerator( ).getNumberOfRefinablePoints() pointsNum = self.getNumOfPointsToRefine(refinableNum) functor = SurplusRefinementFunctor(self.errors, pointsNum, self.getAdaptThreshold()) self.grid.createGridGenerator().refine(functor)
class TestDataVector(unittest.TestCase): ## # Set up, create random DataVector and corresponding Python data structures. # @test DataVector::get(), DataVector::set() def setUp(self): from pysgpp import DataVector import random ## number of rows self.nrows = 5 ## number of columns self.ncols = 4 ## number of entries self.N = self.nrows * self.ncols ## random list of lists self.l_rand = [[ 2 * (random.random() - 0.5) for j in xrange(self.ncols) ] for i in xrange(self.nrows)] ## same as l_rand, but flattened self.l_rand_total = [] for li in self.l_rand: self.l_rand_total.extend(li) # ## Data Vector, corresponding to l_rand # self.d_rand = DataVector(self.nrows,self.ncols) # for i in xrange(self.N): # self.d_rand[i] = self.l_rand_total[i] # # for i in xrange(self.N): # self.assertEqual(self.d_rand[i], self.l_rand_total[i]) ## Data Vector, corresponding to l_rand self.d_rand = DataVector(self.N) for i in xrange(self.N): self.d_rand[i] = self.l_rand_total[i] for i in xrange(self.N): self.assertEqual(self.d_rand[i], self.l_rand_total[i]) ## # Constructors4. # @test DataVector::DataVector(size_t size), DataVector::DataVector(size_t size, size_t dim), DataVector::DataVector(DataVectorDefinition &DataVectorDef), DataVector::getSize(), DataVector::getDim(), DataVector::getSize() # @todo (pflueged) DataVector::DataVector(double *input, size_t size, size_t dim) def testConstructor(self): from pysgpp import DataVector d = DataVector(2) self.assertEqual(len(d), 2) # getSize() # d = DataVector(2,3) # self.assertEqual(d.getSize(), 2) # self.assertEqual(d.getDim(), 3) # self.assertEqual(len(d), 2*3) # getSize() # # d2 = DataVector(self.d_rand) # for i in xrange(self.N): # self.assertEqual(d2[i], self.d_rand[i]) # self.assertEqual(d2.getSize(), self.nrows) # self.assertEqual(d2.getDim(), self.ncols) # self.assertEqual(len(d2), self.N) # d2[self.ncols] = -4.0 # self.assertNotEqual(d2[self.ncols], self.d_rand[self.ncols]) ## # Min, Max operations. # @test DataVector::min(int d), DataVector::max(int d), DataVector::minmax(int d, double *min, double *max), DataVector::min(), DataVector::max() def testMinMax(self): # # test dimension-dependent min, max # for j in xrange(self.ncols): # minj = min([self.l_rand[i][j] for i in xrange(self.nrows)]) # maxj = max([self.l_rand[i][j] for i in xrange(self.nrows)]) # self.assertEqual(self.d_rand.min(j), minj) # self.assertEqual(self.d_rand.max(j), maxj) # mi, ma = self.d_rand.minmax(j) # self.assertEqual(mi, minj) # self.assertEqual(ma, maxj) # test global min, max self.assertEqual(self.d_rand.min(), min(self.l_rand_total)) self.assertEqual(self.d_rand.max(), max(self.l_rand_total)) ## # Operations on DataVectors. # @test DataVector::sum(), DataVector::sqr(), DataVector::abs(), DataVector::componentwise_mult(), DataVector::componentwise_div() def testOps(self): from pysgpp import DataVector # sum self.assertAlmostEqual(self.d_rand.sum(), sum(self.l_rand_total)) # sqr d = DataVector(self.d_rand) d.sqr() for i in xrange(self.N): self.assertEqual(self.d_rand[i]**2, d[i]) # abs d = DataVector(self.d_rand) d.abs() for i in xrange(self.N): self.assertEqual(abs(self.d_rand[i]), d[i]) # componentwise_mult d = DataVector(self.d_rand) # d2 = DataVector(self.nrows, self.ncols) d2 = DataVector(self.N) for i in xrange(self.N): d2[i] = i d.componentwise_mult(d2) for i in xrange(self.N): self.assertEqual(self.d_rand[i] * i, d[i]) # componentwise_div d = DataVector(self.d_rand) for i in xrange(self.N): d2[i] = i + 1 d.componentwise_div(d2) for i in xrange(self.N): self.assertEqual(self.d_rand[i] / (i + 1), d[i]) ## # Vector-Operations # @test DataVector::dotProduct(DataVector &vec) def testDotProduct(self): from pysgpp import DataVector x = 0 d = DataVector(3) for i in xrange(len(d)): d[i] = i + 1 x += d[i] * d[i] self.assertEqual(d.dotProduct(d), x)