class LearnerBuilder(object): # #created @link python.learner.Learner.Learner Learner @endlink object __learner = None # #@link python.pysgpp.extensions.datadriven.controller.CheckpointController.CheckpointController # CheckpointController @endlink if any used __checkpointController = None __gridDescriptor = None __specificationDescriptor = None __stopPolicyDescriptor = None __solverDescriptor = None ## # Default constuctor ## def __init__(self): self.__learner = None self.__gridDescriptor = None self.__specificationDescriptor = None self.__stopPolicyDescriptor = None ## Returns the object of learner subclass, that is currently beeing constructed # @return the object of learner subclass, that is currently beeing constructed def getLearner(self): return self.__learner ## Returns the checkpoint controller # @return the checkpoint controller def getCheckpointController(self): return self.__checkpointController ## Start building Regressor # @return: LearnerBuilder itself ## def buildRegressor(self): self.__learner = Regressor() return self.__buildCommonLearner(self.__learner) ## Start building Classifier # @return: LearnerBuilder itself ## def buildClassifier(self,): self.__learner = Classifier() return self.__buildCommonLearner(self.__learner) def __buildCommonLearner(self, learner): learnedKnowledge = LearnedKnowledge() learner.setLearnedKnowledge(learnedKnowledge) #stopPolicy = TrainingStopPolicy() #learner.setStopPolicy(stopPolicy) return self ## Start description of specification parameters for learner # @return: SpecificationDescriptor ## def withSpecification(self): self.__specificationDescriptor = LearnerBuilder.SpecificationDescriptor(self) return self.__specificationDescriptor ## Start description of parameters of CG-Solver for learner # @return: CGSolverDescriptor ## def withCGSolver(self): self.__solverDescriptor = LearnerBuilder.CGSolverDescriptor(self) return self.__solverDescriptor ## Start description of parameters of CG-Solver for learner # @return: GridDescriptor ## def withGrid(self): self.__gridDescriptor = LearnerBuilder.GridDescriptor(self) return self.__gridDescriptor ## #Set the starting iteration number ane return the builder object # # @param iteration: integer starting iteration number # @return: LeanreBuilder ## def withStartingIterationNumber(self, iteration): self.__learner.setCurrentIterationNumber(iteration) return self ## # Start description of parameters of stop-policy for learner # @return: StopPolicyDescriptor ## def withStopPolicy(self): self.__stopPolicyDescriptor = LearnerBuilder.StopPolicyDescriptor(self) return self.__stopPolicyDescriptor ## # Returns the builded learner (regressor or classifier), should be called in the and of construction # # @return: Learner (Classifier of Regressor) ## def andGetResult(self): if self.__gridDescriptor == None: self.__gridDescriptor = LearnerBuilder.GridDescriptor() if self.__specificationDescriptor == None: self.__specificationDescriptor == LearnerBuilder.SpecificationDescriptor(self) if self.__learner.specification.getBOperator() == None: self.__learner.specification.setBOperator( createOperationMultipleEval(self.__learner.grid, self.__learner.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY)), DataContainer.TRAIN_CATEGORY) try: self.__learner.specification.setBOperator( createOperationMultipleEval(self.__learner.grid, self.__learner.dataContainer.getPoints(DataContainer.TEST_CATEGORY)), DataContainer.TEST_CATEGORY) except: pass return self.__learner ## # Signals to use N-fold cross validation with sequential folding rule # # @return: FoldingDescriptor ## def withSequentialFoldingPolicy(self): self.__foldingPolicyDescriptor = LearnerBuilder.FoldingDescriptor(self, LearnerBuilder.FoldingDescriptor.SEQUENTIAL) return self.__foldingPolicyDescriptor ## # Signals to use N-fold cross validation with random folding rule # # @return: FoldingDescriptor ## def withRandomFoldingPolicy(self): self.__foldingPolicyDescriptor = LearnerBuilder.FoldingDescriptor(self, LearnerBuilder.FoldingDescriptor.RANDOM) return self.__foldingPolicyDescriptor ## # Signals to use N-fold cross validation with stratified folding rule # # @return: FoldingDescriptor ## def withStratifiedFoldingPolicy(self): self.__foldingPolicyDescriptor = LearnerBuilder.FoldingDescriptor(self, LearnerBuilder.FoldingDescriptor.STRATIFIED) return self.__foldingPolicyDescriptor ## # Signals to use N-fold cross validation from a set of files # # @return: FoldingDescriptor ## def withFilesFoldingPolicy(self): self.__foldingPolicyDescriptor = LearnerBuilder.FoldingDescriptor(self, LearnerBuilder.FoldingDescriptor.STRATIFIED) return self.__foldingPolicyDescriptor ## # Signals to use data from ARFF file for training dataset # # @param filename: Filename where to read the data from # @param name: Category name, default: "train" # @return: LearnerBuilder ## def withTrainingDataFromARFFFile(self, filename, name="train"): dataContainer = ARFFAdapter(filename).loadData(name) if self.__learner.dataContainer != None: self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer)) else: self.__learner.setDataContainer(dataContainer) return self ## # Signals to use data from ARFF file for testing dataset # # @param filename: Filename where to read the data from # @return: LearnerBuilder object itself def withTestingDataFromARFFFile(self, filename): dataContainer = ARFFAdapter(filename).loadData(DataContainer.TEST_CATEGORY) if self.__learner.dataContainer != None: self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer)) else: self.__learner.setDataContainer(dataContainer) return self def withTrainingDataFromNumPyArray(self, points, values, name="train"): dataContainer = DataContainer(points=points, values=values, name=name) if self.__learner.dataContainer != None: self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer)) else: self.__learner.setDataContainer(dataContainer) return self def withTestingDataFromNumPyArray(self, points, values, name="test"): return self.withTrainingDataFromNumPyArray(points, values, "test") ## # Signals to use data from CSV file for training dataset # # @param filename: Filename where to read the data from # @param name: Category name, default: "train" # @return: LearnerBuilder ## def withTrainingDataFromCSVFile(self, filename, name="train"): dataContainer = CSVAdapter(filename).loadData(name) if self.__learner.dataContainer != None: self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer)) else: self.__learner.setDataContainer(dataContainer) return self ## # Signals to use data from CSV file for testing dataset # # @param filename: Filename where to read the data from # @return: LearnerBuilder object itself def withTestingDataFromCSVFile(self, filename): dataContainer = CSVAdapter(filename).loadData(DataContainer.TEST_CATEGORY) if self.__learner.dataContainer != None: self.__learner.setDataContainer(self.__learner.dataContainer.combine(dataContainer)) else: self.__learner.setDataContainer(dataContainer) return self ## # Signals to use initial data for alpha vector from ARFF file # # @param filename: Filename where to read the data from # @return: LearnerBuilder object itself def withInitialAlphaFromARFFFile(self, filename): alpha = LearnedKnowledgeFormatter().deserializeFromFile(filename) self.__learner.alpha = alpha self.__learner.knowledge.setMemento(alpha) return self ## # Attaches checkpoint controller to the learner # # @param controller: Checkpoint controller which implements LearnerEventController # @return: LearnerBuilder def withCheckpointController(self, controller): self.__checkpointController = controller self.__learner.attachEventController(self.__checkpointController) self.__checkpointController.setGrid(self.__learner.grid) self.__checkpointController.setLearnedKnowledge(self.__learner.knowledge) self.__checkpointController.setLearner(self.__learner) return self ## # Attaches progress presentor to the learner # # @param presentor: progress presentor which implements LearnerEventController # @return: LearnerBuilder def withProgressPresenter(self, presentor): self.__learner.attachEventController(presentor) if self.__learner.solver != None: self.__learner.solver.attachEventController(presentor) return self ## # Grid Descriptor helps to implement fluid interface patter on python # it encapsulates functionality concerning creation of the grid class GridDescriptor: __builder = None __deg = None __level = None __file = None __border = None __dim = None ## Constructor # # @param builder: LearnerBuilder which creates this Descriptor def __init__(self, builder): self.__builder = builder self.__dim = self.__builder.getLearner().dataContainer.getDim() self.__deg = None self.__level = None self.__file = None self.__border = None self.__cliqueSize = None ## # Overrides built-in method # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so it tries to call the method from our builder # # @param attr: String for method name # @return: Method calling in LearnerBuilder def __getattr__(self, attr): grid = None if self.__file != None: gridFormatter = GridFormatter() grid = gridFormatter.deserializeFromFile(self.__file) self.__builder.getLearner().setGrid(grid) else: if self.__dim == None or self.__level == None: raise AttributeError, "Not all attributes assigned to create grid" if self.__border != None: if self.__border == BorderTypes.TRAPEZOIDBOUNDARY: grid = Grid.createLinearBoundaryGrid(self.__dim) elif self.__border == BorderTypes.COMPLETEBOUNDARY: grid = Grid.createLinearBoundaryGrid(self.__dim, 0) else: if self.__deg > 1: grid = Grid.createModPolyGrid(self.__dim, self.__deg) else: grid = Grid.createModLinearGrid(self.__dim) else: #no border points if self.__deg > 1: grid = Grid.createPolyGrid(self.__dim, self.__deg) else: grid = Grid.createLinearGrid(self.__dim) generator = grid.createGridGenerator() if self.__cliqueSize == None: generator.regular(self.__level) else: generator.cliques(self.__level, self.__cliqueSize) self.__builder.getLearner().setGrid(grid) return getattr(self.__builder, attr) ## # Defines the level of the grid # # @param level: level as integer # @return: GridDescriptor itself def withLevel(self, level): self.__level = level return self ## # Defines the polynomial base of the grid # # @param deg: degree of polynomial base as integer # @return: GridDescriptor itself ## def withPolynomialBase(self, deg): self.__deg = deg return self ## # Defines the border type of the grid # # @param type: border type as defin.datadriven.learner.Types.BorderTypes # @return: GridDescriptor itself ## def withBorder(self, type): self.__border = type return self ## # Indicates that grid should be restored from file # # @param filename: String name of file the grid should be restored from # @return: GridDescriptor itself ## def fromFile(self, filename): self.__file = filename return self ## # Creates a special kind of grid where every cliqueSize dimensions are # complitely interconnected (building a clique in a corresponding # graphical model), while the connection between cliques exist only over # the level 1 functions # # @param cliqueSize the number of dimensions in a clique # @return: GridDescriptor itself def withCliques(self, cliqueSize): if self.__dim < cliqueSize: raise Exception("Grid dimensionality should be not smaller than the clique size") self.__cliqueSize = cliqueSize return self ## # TrainingStopPolicy Descriptor helps to implement fluid interface patter on python # it encapsulates functionality concerning creation of the training stop policy ## class StopPolicyDescriptor: __builder = None __policy = None ## # Constructor # # @param builder: LearnerBuilder which creates this Descriptor ## def __init__(self, builder): self.__builder = builder self.__policy = TrainingStopPolicy() ## # Overrides built-in method # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so it tries to call the method from our builder # # @param attr: String for method name # @return: Method calling in LearnerBuilder ## def __getattr__(self, attr): # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so we store results of descriptor and try to call the method from our builder #if attr not in dir(self): #if none parameters are set, only one iteration has to be made if self.__policy.getAdaptiveIterationLimit()==None and \ self.__policy.getAccuracyLimit() == None and \ self.__policy.getEpochsLimit() == None and \ self.__policy.getGridSizeLimit() == None and \ self.__policy.getMSELimit() == None: self.__policy.setAdaptiveIterationLimit(0) self.__builder.getLearner().setStopPolicy(self.__policy) return getattr(self.__builder, attr) ## # Defines the maximal number of refinement steps #limit # @param limit: integer for maximal number of refinement steps # @return: StopPolicyDescriptor itself ## def withAdaptiveItarationLimit(self, limit): self.__policy.setAdaptiveIterationLimit(limit) return self ## # Defines the maximal number of epochs MSE of test data can constantly increase # # @param limit: integer for maximal number of epochs # @return: StopPolicyDescriptor itself ## def withEpochsLimit(self, limit): self.__policy.setEpochsLimit(limit) return self ##Defines the MSE for test data, which have to be arrived # # @param limit: float for MSE # @return: StopPolicyDescriptor itself ## def withMSELimit(self, limit): self.__policy.setMSELimit(limit) return self ## # Defines the maximal number of points on grid # # @param limit: integer for maximal number of points on grid # @return: StopPolicyDescriptor itself ## def withGridSizeLimit(self, limit): self.__policy.setGridSizeLimit(limit) return self ## # Defines the accuracy for test data, which have to be arrived # # @param limit: float for accuracy # @return: StopPolicyDescriptor itself ## def withAccuracyLimit(self, limit): self.__policy.setAccuracyLimit(limit) return self ## # TrainingSpecification Descriptor helps to implement fluid interface patter on python # it encapsulates functionality concerning creation of the training specification ## class SpecificationDescriptor: __builder = None __specification = None ## # Constructor # # @param builder: LearnerBuilder which creates this Descriptor ## def __init__(self, builder): self.__builder = builder self.__specification = TrainingSpecification() ## # Overrides built-in method # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so it tries to call the method from our builder # # @param attr: String for method name # @return: Method calling in LearnerBuilder ## def __getattr__(self, attr): # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so we try to call the method from our builder if self.__specification.getCOperator() == None: #use laplace operator default self.__specification.setCOperator(createOperationIdentity(self.__builder.getLearner().grid)) self.__specification.setCOperatorType('identity') self.__builder.getLearner().setSpecification(self.__specification) return getattr(self.__builder, attr) ## # Specifies regression parameter of the learner # # @param value: float for regression parameter # @return: SpecificationDescriptor itself ## def withLambda(self, value): self.__specification.setL(value) return self ## # Specifies refinement threshold # # @param value: float for refinement threshold # @return: SpecificationDescriptor itself ## def withAdaptThreshold(self, value): self.__specification.setAdaptThreshold(value) return self ## # Specifies number of points, which have to be refined in refinement step # # @param value: integer for number of points to refine # @return: SpecificationDescriptor itself ## def withAdaptPoints(self, value): self.__specification.setAdaptPoints(value) return self ## Specifies rate from total number of points on grid, which should be refined # # @param value: float for rate # @return: SpecificationDescriptor itself ## def withAdaptRate(self, value): self.__specification.setAdaptRate(value) return self ## Specifies to use laplace operator # # @return: SpecificationDescriptor itself ## def withLaplaceOperator(self, ): self.__specification.setCOperator(createOperationLaplace(self.__builder.getLearner().grid)) self.__specification.setCOperatorType('laplace') return self ## Specifies to use identity operator # # @return: SpecificationDescriptor itself ## def withIdentityOperator(self, ): self.__specification.setCOperator(createOperationIdentity(self.__builder.getLearner().grid)) self.__specification.setCOperatorType('identity') return self def withVectorizationType(self, vecType): self.__specification.setVectorizationType(vecType) return self ## # CGSolver Descriptor helps to implement fluid interface patter on python # it encapsulates functionality concerning creation of the CG-Solver ## class CGSolverDescriptor: __builder = None __solver = None ## # Constructor # # @param builder: LearnerBuilder which creates this Descriptor ## def __init__(self, builder): self.__builder = builder self.__solver = CGSolver() ## # Overrides built-in method # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so it tries to call the method from our builder # # @param attr: String for method name # @return: Method calling in LearnerBuilder ## def __getattr__(self, attr): # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so we try to call the method from our builder self.__builder.getLearner().setSolver(self.__solver) return getattr(self.__builder, attr) ## # Defines the accuracy of CG-Solver # # @param accuracy: float for accuracy # @return: CGSolverDescriptor itself ## def withAccuracy(self, accuracy): self.__solver.setEpsilon(accuracy) return self ## # Defines the maxinmal number of iterations in CG algotihms # # @param imax: integer for maximal number of iteration in CG # @return: CGSolverDescriptor itself ## def withImax(self, imax): self.__solver.setImax(imax) return self ## # Defines the maximal accuracy. # If the norm of the residuum falls below this threshold, stop the CG iterations # # @param threshold: maximal accuracy # @return: CGSolverDescriptor itself ## def withThreshold(self, threshold): self.__solver.setThreshold(threshold) return self ## The reusage of previous alpha data in the CG iteration # @return: CGSolverDescriptor itself def withAlphaReusing(self,): self.__solver.setReuse(True) return self ## # Folding Descriptor helps to implement fluid interface patter on python # it encapsulates functionality concerning the usage for N-fold cross-validation ## class FoldingDescriptor: SEQUENTIAL = 100 ## Sequential folding policy RANDOM = 200 ## Random folding policy STRATIFIED = 300 ## Stratified folding policy FILES = 400 ## Files folding policy __builder = None __level = None __type = None __policy = None __seed = None ## # Constructor # # @param builder: LearnerBuilder which creates this Descriptor # @param type: Type of folding policy that should be build ## def __init__(self, builder, type): self.__builder = builder self.__type = type ## # Overrides built-in method # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so it tries to call the method from our builder # # @param attr: String for method name # @return: Method calling in LearnerBuilder ## def __getattr__(self, attr): # if method called is not a object method of this Descriptor, most probably it's a method of # LearnerBuilder so we try to call the method from our builder if self.__builder.getLearner().dataContainer != None: dataContainer = self.__builder.getLearner().dataContainer else: raise Exception("Data not defined. Trainign data has to be defined before the folding policy") if self.__level == None: raise Exception("Folding level has to be defined") if self.__type == self.SEQUENTIAL: self.__policy = SequentialFoldingPolicy(dataContainer, self.__level) elif self.__type == self.RANDOM: self.__policy = RandomFoldingPolicy(dataContainer, self.__level, self.__seed) elif self.__type == self.STRATIFIED: self.__policy = StratifiedFoldingPolicy(dataContainer, self.__level, self.__seed) elif self.__type == self.FILES: self.__policy = FilesFoldingPolicy(dataContainer, self.__level) else: raise Exception("Folding type is not defined or is unproper") self.__builder.getLearner().setFoldingPolicy(self.__policy) return getattr(self.__builder, attr) ## # Defines the folding level # # @param level: integer folding level # @return: FoldingDescriptor itself ## def withLevel(self, level): self.__level = level return self ## # Defines the seed for random folding policy # # @param seed: integer seed # @return: FoldingDescriptor itself ## def withSeed(self, seed): self.__seed = seed return self