class AcceptanceCondition(utils.metaclass_insert(abc.ABCMeta, object)): """ GradientApproximators use provided information to both select points required to estimate gradients as well as calculate the estimates. """ ########################## # Initialization Methods # ########################## @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, cls, the class for which we are retrieving the specification @ Out, specs, InputData.ParameterInput, class to use for specifying input of cls. """ specs = InputData.parameterInputFactory(cls.__name__, ordered=False, strictMode=True) specs.description = 'Base class for acceptance conditions in the GradientDescent Optimizer.' return specs def __init__(self): """ Constructor. @ In, None @ Out, None """ ## Instance Variable Initialization # public # _protected # __private # additional methods def handleInput(self, specs): """ Read input specs @ In, specs, InputData.ParameterInput, parameter specs interpreted @ Out, None """ pass def initialize(self): """ After construction, finishes initialization of this approximator. @ In, None @ Out, None """ pass ############### # Run Methods # ############### @abc.abstractmethod def checkImprovement(self, new, old): """
def addInternal(self, Input, functionToRun, identifier, metadata=None, modulesToImport=[], forceUseThreads=False, uniqueHandler="any", clientQueue=False): """ Method to add an internal run (function execution) @ In, Input, list, list of Inputs that are going to be passed to the function to be executed as *args @ In, functionToRun,function or method, the function that needs to be executed @ In, identifier, string, the job identifier @ In, metadata, dict, optional, dictionary of metadata associated to this run @ In, modulesToImport, list, optional, list of modules that need to be imported for internal parallelization (parallel python). This list should be generated with the method returnImportModuleString in utils.py @ In, forceUseThreads, bool, optional, flag that, if True, is going to force the usage of multi-threading even if parallel python is activated @ In, uniqueHandler, string, optional, it is a special keyword attached to this runner. For example, if present, to retrieve this runner using the method jobHandler.getFinished, the uniqueHandler needs to be provided. If uniqueHandler == 'any', every "client" can get this runner @ In, clientQueue, boolean, optional, if this run needs to be added in the clientQueue @ Out, None """ ## internal server is initialized only in case an internal calc is requested if not self.isParallelPythonInitialized: self.__initializeParallelPython() if self.ppserver is None or forceUseThreads: internalJob = Runners.SharedMemoryRunner(self.messageHandler, Input, functionToRun, identifier, metadata, uniqueHandler) else: skipFunctions = [utils.metaclass_insert(abc.ABCMeta, BaseType)] internalJob = Runners.DistributedMemoryRunner( self.messageHandler, self.ppserver, Input, functionToRun, modulesToImport, identifier, metadata, skipFunctions, uniqueHandler) with self.__queueLock: if not clientQueue: self.__queue.append(internalJob) else: self.__clientQueue.append(internalJob) self.__submittedJobs.append(identifier)
class Metric(utils.metaclass_insert(abc.ABCMeta, BaseType)): """ This is the general interface to any RAVEN metric object. It contains an initialize, a _readMoreXML, and an evaluation (i.e., distance) methods """ def __init__(self): """ This is the basic method initialize the metric object @ In, none @ Out, none """ BaseType.__init__(self) self.type = self.__class__.__name__ self.name = self.__class__.__name__ self.acceptsProbability = False #If True the metric needs to be able to handle (value,probability) where value and probability are lists self.acceptsDistribution = False #If True the metric needs to be able to handle a passed in Distribution def initialize(self, inputDict): """ This method initialize each metric object @ In, inputDict, dict, dictionary containing initialization parameters @ Out, none """ pass def _readMoreXML(self, xmlNode): """ Method that reads the portion of the xml input that belongs to this specialized class and initialize internal parameters @ In, xmlNode, xml.etree.Element, Xml element node @ Out, None """ self._localReadMoreXML(xmlNode) def distance(self, x, y, **kwargs): """ This method actually calculates the distance between two dataObjects x and y @ In, x, dict, dictionary containing data of x @ In, y, dict, dictionary containing data of y @ In, kwargs, dictionary of parameters characteristic of each metric (e.g., weights) @ Out, value, float, distance between x and y """ pass
class CrossValidation(utils.metaclass_insert(abc.ABCMeta), MessageHandler.MessageUser): """ Cross validation methods used to validate models """ def __init__(self, messageHandler=None, **kwargs): """ This is the basic method initialize the cross validation object @ In, messageHandler, object, Message handler object @ In, kwargs, dict, arguments for the Pairwise metric @ Out, none """ assert messageHandler is not None self.messageHandler = messageHandler self.printTag = 'Cross Validation' if kwargs != None: self.initOptionDict = kwargs else: self.initOptionDict = {} def reset(self): """ Used to reset private variables @ In, None @ Out, None """ pass def getCrossValidationType(self): """ This method is used to return the type of cross validation to be employed @ In, None @ Out, None """ pass def generateTrainTestIndices(self): """ This method is used to generate train/test indices to split data in train test sets @ In, None @ Out, None """ pass
class MetricDistributor(utils.metaclass_insert(abc.ABCMeta, BaseType), MessageHandler.MessageUser): """ This class represents an interface with all the metrics algorithms It is a utility class needed to hide the discernment between time-dependent and static metrics """ def __init__(self, estimator, messageHandler): """ A constructor @ In, estimator, instance of given metric @ In, messageHandler, MessageHandler object, it is in charge of raising errors, and printing messages @ In, kwargs, dict, an arbitrary list of kwargs @ Out, None """ self.printTag = 'MetricDistributor' # object of message handler self.messageHandler = messageHandler # instance of given Metric self.estimator = estimator # True if the instance of given metric, i.e. 'estimator', can handle time-dependent data, else False self.canHandleDynamicData = self.estimator.isDynamic() # True if the instance of given metric, i.e. 'estimator', can handle pairwise data, else False self.canHandlePairwiseData = self.estimator.isPairwise() def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['Handle dynamic data'] = self.canHandleDynamicData paramDict['Handle pairwise data'] = self.canHandlePairwiseData paramDict['Metric name'] = self.estimator.name return paramDict def evaluatePairwise(self, pairedData): """ Method to compute the the metric between each pair of rows of matrices in pairedData @ In, pairedData, tuple, (featureValues, targetValues), both featureValues and targetValues are 2D numpy array with the same number of columns. For example, featureValues with shape (numRealizations1,numParameters), targetValues with shape (numRealizations2, numParameters) @ Out, output, numpy.ndarray, 2D array, with shape (numRealizations1,numRealization2) """ assert (type(pairedData).__name__ == 'tuple' ), "The paired data is not a tuple!" if not self.canHandlePairwiseData: self.raiseAnError(IOError, "The metric", self.estimator.name, "can not handle pairwise data") feat, targ = pairedData output = self.estimator.evaluate(feat, targ) return output def evaluate(self, pairedData, weights=None, multiOutput='mean'): """ Method to perform the evaluation of given paired data @ In, pairedData, tuple, ((featureValues, probabilityWeight), (targetValues, probabilityWeight)), both featureValues and targetValues have the same shape (numRealizations, numHistorySteps) @ In, weights, array_like (numpy.ndarray or list), optional, An array of weights associated with the pairedData @ In, multiOutput, string, optional, 'mean', 'max', 'min' or 'raw_values' @ Out, output, numpy.ndarray, 1D array, processed output from the estimator """ assert (type(pairedData).__name__ == 'tuple') # Error check for input data dynamicOutput = [] for pData in pairedData: if not self.estimator.acceptsDistribution and isinstance( pData, Distributions.Distribution): self.raiseAnError(IOError, "Distribution is provided, but the metric ", self.estimator.name, " can not handle it!") feat, targ = pairedData if isinstance(feat, Distributions.Distribution) and isinstance( targ, Distributions.Distribution): out = self.estimator.evaluate(feat, targ) dynamicOutput.append(out) elif isinstance(feat, Distributions.Distribution): targVals = np.asarray(targ[0]) for hist in range(targVals.shape[1]): if targ[1] is not None: assert (len(targVals[:, hist]) == len(targ[1])) targIn = (targVals[:, hist], targ[1]) else: targIn = targVals[:, hist] out = self.estimator.evaluate(feat, targIn) dynamicOutput.append(out) elif isinstance(targ, Distributions.Distribution): featVals = np.asarray(feat[0]) for hist in range(featVals.shape[1]): if feat[1] is not None: assert (len(featVals[:, hist]) == len(feat[1])) featIn = (featVals[:, hist], feat[1]) else: featIn = featVals[:, hist] out = self.estimator.evaluate(featIn, targ) dynamicOutput.append(out) elif self.estimator.type in ['CDFAreaDifference', 'PDFCommonArea']: featVals = np.asarray(feat[0]) targVals = np.asarray(targ[0]) for hist in range(featVals.shape[1]): if feat[1] is not None: featIn = (featVals[:, hist], feat[1]) else: featIn = featVals[:, hist] if targ[1] is not None: assert (len(targVals[:, hist]) == len(targ[1])) targIn = (targVals[:, hist], targ[1]) else: targIn = targVals[:, hist] out = self.estimator.evaluate(featIn, targIn) dynamicOutput.append(out) else: featVals = np.asarray(feat[0]) targVals = np.asarray(targ[0]) assert (featVals.shape[0] == targVals.shape[0]) if feat[1] is not None: dataWeight = np.asarray(feat[1]) assert (featVals.shape[0] == dataWeight.shape[0]) else: dataWeight = None # FIXME: Currently, we only use the weights of given features to compute the metric, this # can be biased or uncorrect. The correct way is to use the joint probability weight. # This needs to be improved in the future when RAVEN can handle the joint probability weight correctly. if self.canHandleDynamicData: dynamicOutput = self.estimator.evaluate( featVals, targVals, dataWeight) else: for hist in range(featVals.shape[1]): out = self.estimator.evaluate(featVals[:, hist], targVals[:, hist], dataWeight) dynamicOutput.append(out) if multiOutput == 'mean': output = [np.average(dynamicOutput, weights=weights)] elif multiOutput == 'max': output = [np.amax(dynamicOutput)] elif multiOutput == 'min': output = [np.amin(dynamicOutput)] elif multiOutput == 'raw_values': output = dynamicOutput else: self.raiseAnError( IOError, "multiOutput: ", multiOutput, " is not acceptable! Please use 'mean', 'max', 'min' or 'full'" ) output = np.asarray(output) return output
class Sampler(utils.metaclass_insert(abc.ABCMeta, BaseType), Assembler): """ This is the base class for samplers Samplers own the sampling strategy (Type) and they generate the input values using the associate distribution. They do not have distributions inside!!!! --Instance-- myInstance = Sampler() myInstance.XMLread(xml.etree.ElementTree.Element) This method generates all the information that will be permanent for the object during the simulation --usage-- myInstance = Sampler() myInstance.XMLread(xml.etree.ElementTree.Element) This method generate all permanent information of the object from <Simulation> myInstance.whatDoINeed() -see Assembler class- myInstance.generateDistributions(dict) Here the seed for the random engine is started and the distributions are supplied to the sampler and initialized. The method is called come from <Simulation> since it is the only one possess all the distributions. myInstance.initialize() This method is called from the <Step> before the Step process start. In the base class it reset the counter to 0 myInstance.amIreadyToProvideAnInput Requested from <Step> used to verify that the sampler is available to generate a new input myInstance.generateInput(self,model,oldInput) Requested from <Step> to generate a new input. Generate the new values and request to model to modify according the input and returning it back --Other inherited methods-- myInstance.whoAreYou() -see BaseType class- myInstance.myCurrentSetting() -see BaseType class- --Adding a new Sampler subclass-- <MyClass> should inherit at least from Sampler or from another step already presents DO NOT OVERRIDE any of the class method that are not starting with self.local* ADD your class to the dictionary __InterfaceDict at the end of the module The following method overriding is MANDATORY: self.localGenerateInput(model,oldInput) : this is where the step happens, after this call the output is ready the following methods could be overrode: self.localInputAndChecks(xmlNode) self.localGetInitParams() self.localGetCurrentSetting() self.localInitialize() self.localStillReady(ready) self.localFinalizeActualSampling(jobObject,model,myInput) """ def __init__(self): """ Default Constructor that will initialize member variables with reasonable defaults or empty lists/dictionaries where applicable. @ In, None @ Out, None """ BaseType.__init__(self) Assembler.__init__(self) self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) self.toBeSampled = { } # Sampling mapping dictionary {'Variable Name':'name of the distribution'} self.dependentSample = { } # Sampling mapping dictionary for dependent variables {'Variable Name':'name of the external function'} self.distDict = { } # Contains the instance of the distribution to be used, it is created every time the sampler is initialized. keys are the variable names self.funcDict = { } # Contains the instance of the function to be used, it is created every time the sampler is initialized. keys are the variable names self.values = { } # for each variable the current value {'var name':value} self.inputInfo = { } # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below self.initSeed = None # if not provided the seed is randomly generated at the istanciation of the sampler, the step can override the seed by sending in another seed self.inputInfo[ 'SampledVars'] = self.values # this is the location where to get the values of the sampled variables self.inputInfo['SampledVarsPb'] = { } # this is the location where to get the probability of the sampled variables self.inputInfo[ 'PointProbability'] = None # this is the location where the point wise probability is stored (probability associated to a sampled point) self.inputInfo['crowDist'] = { } # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object self.constants = {} # In this dictionary self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding self.FIXME = False # FIXME flag self.printTag = self.type # prefix for all prints (sampler type) self.restartData = None # presampled points to restart from self.restartTolerance = 1e-15 # strictness with which to find matches in the restart data self._endJobRunnable = sys.maxsize # max number of inputs creatable by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) ###### self.variables2distributionsMapping = { } # for each variable 'varName' , the following informations are included: 'varName': {'dim': 1, 'reducedDim': 1,'totDim': 2, 'name': 'distName'} ; dim = dimension of the variable; reducedDim = dimension of the variable in the transformed space; totDim = total dimensionality of its associated distribution self.distributions2variablesMapping = { } # for each variable 'distName' , the following informations are included: 'distName': [{'var1': 1}, {'var2': 2}]} where for each var it is indicated the var dimension self.NDSamplingParams = { } # this dictionary contains a dictionary for each ND distribution (key). This latter dictionary contains the initialization parameters of the ND inverseCDF ('initialGridDisc' and 'tolerance') ###### self.addAssemblerObject('Restart', '-n', True) #used for PCA analysis self.variablesTransformationDict = { } # for each variable 'modelName', the following informations are included: {'modelName': {latentVariables:[latentVar1, latentVar2, ...], manifestVariables:[manifestVar1,manifestVar2,...]}} self.transformationMethod = { } # transformation method used in variablesTransformation node {'modelName':method} self.entitiesToRemove = [ ] # This variable is used in order to make sure the transformation info is printed once in the output xml file. def _localGenerateAssembler(self, initDict): """ It is used for sending to the instanciated class, which is implementing the method, the objects that have been requested through "whatDoINeed" method It is an abstract method -> It must be implemented in the derived class! @ In, initDict, dict, dictionary ({'mainClassName(e.g., Databases):{specializedObjectName(e.g.,DatabaseForSystemCodeNamedWolf):ObjectInstance}'}) @ Out, None """ availableDist = initDict['Distributions'] availableFunc = initDict['Functions'] self._generateDistributions(availableDist, availableFunc) def _localWhatDoINeed(self): """ This method is a local mirror of the general whatDoINeed method. It is implemented by the samplers that need to request special objects @ In, None @ Out, needDict, dict, list of objects needed """ needDict = {} needDict['Distributions'] = [ ] # Every sampler requires Distributions OR a Function needDict['Functions'] = [ ] # Every sampler requires Distributions OR a Function for dist in self.toBeSampled.values(): needDict['Distributions'].append((None, dist)) for func in self.dependentSample.values(): needDict['Functions'].append((None, func)) return needDict def _readMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got The text is supposed to contain the info where and which variable to change. In case of a code the syntax is specified by the code interface itself @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ Assembler._readMoreXML(self, xmlNode) self._readMoreXMLbase(xmlNode) self.localInputAndChecks(xmlNode) def _readMoreXMLbase(self, xmlNode): """ Function to read the portion of the xml input that belongs to the base sampler only and initialize some stuff based on the inputs got The text is supposed to contain the info where and which variable to change. In case of a code the syntax is specified by the code interface itself @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node1 @ Out, None """ for child in xmlNode: prefix = "" if child.tag == 'Distribution': for childChild in child: if childChild.tag == 'distribution': prefix = "<distribution>" tobesampled = childChild.text self.toBeSampled[prefix + child.attrib['name']] = tobesampled #if child.attrib['name'] != tobesampled:self.raiseAnError(IOError,"name of the <Distribution> node and <distribution> mismatches for node named "+ child.attrib['name']) elif child.tag == 'variable': foundDistOrFunc = False for childChild in child: if childChild.tag == 'distribution': if not foundDistOrFunc: foundDistOrFunc = True else: self.raiseAnError( IOError, 'A sampled variable cannot have both a distribution and a function!' ) tobesampled = childChild.text varData = {} varData['name'] = childChild.text if childChild.get('dim') == None: dim = 1 else: dim = childChild.attrib['dim'] varData['dim'] = int(dim) self.variables2distributionsMapping[ child.attrib['name']] = varData self.toBeSampled[prefix + child.attrib['name']] = tobesampled elif childChild.tag == 'function': if not foundDistOrFunc: foundDistOrFunc = True else: self.raiseAnError( IOError, 'A sampled variable cannot have both a distribution and a function!' ) tobesampled = childChild.text self.dependentSample[ prefix + child.attrib['name']] = tobesampled if not foundDistOrFunc: self.raiseAnError( IOError, 'Sampled variable', child.attrib['name'], 'has neither a <distribution> nor <function> node specified!' ) elif child.tag == "variablesTransformation": transformationDict = {} listIndex = None for childChild in child: if childChild.tag == "latentVariables": transformationDict[childChild.tag] = list( inp.strip() for inp in childChild.text.strip().split(',')) elif childChild.tag == "manifestVariables": transformationDict[childChild.tag] = list( inp.strip() for inp in childChild.text.strip().split(',')) elif childChild.tag == "manifestVariablesIndex": # the index provided by the input file starts from 1, but the index used by the code starts from 0. listIndex = list( int(inp.strip()) - 1 for inp in childChild.text.strip().split(',')) elif childChild.tag == "method": self.transformationMethod[ child.attrib['distribution']] = childChild.text if listIndex == None: self.raiseAWarning( 'Index is not provided for manifestVariables, default index will be used instead!' ) listIndex = range( len(transformationDict["manifestVariables"])) transformationDict["manifestVariablesIndex"] = listIndex self.variablesTransformationDict[ child.attrib['distribution']] = transformationDict elif child.tag == "constant": value = utils.partialEval(child.text) if value is None: self.raiseAnError( IOError, 'The body of "constant" XML block should be a number. Got: ' + child.text) try: self.constants[child.attrib['name']] = value except KeyError: self.raiseAnError( KeyError, child.tag + ' must have the attribute "name"!!!') elif child.tag == "restartTolerance": self.restartTolerance = float(child.text) if len(self.constants) > 0: # check if constant variables are also part of the sampled space. In case, error out if not set(self.toBeSampled.keys()).isdisjoint( self.constants.keys()): self.raiseAnError( IOError, "Some constant variables are also in the sampling space:" + ' '.join([ i if i in self.toBeSampled.keys() else "" for i in self.constants.keys() ])) if self.initSeed == None: self.initSeed = Distributions.randomIntegers(0, 2**31, self) # Creation of the self.distributions2variablesMapping dictionary: {'distName': ({'variable_name1': dim1}, {'variable_name2': dim2})} for variable in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[variable]['name'] dim = self.variables2distributionsMapping[variable]['dim'] listElement = {} listElement[variable] = dim if (distName in self.distributions2variablesMapping.keys()): self.distributions2variablesMapping[distName].append( listElement) else: self.distributions2variablesMapping[distName] = [listElement] # creation of the self.distributions2variablesIndexList dictionary:{'distName':[dim1,dim2,...,dimN]} self.distributions2variablesIndexList = {} for distName in self.distributions2variablesMapping.keys(): positionList = [] for var in self.distributions2variablesMapping[distName]: position = utils.first(var.values()) positionList.append(position) positionList = list(set(positionList)) positionList.sort() self.distributions2variablesIndexList[distName] = positionList for key in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[key]['name'] dim = self.variables2distributionsMapping[key]['dim'] reducedDim = self.distributions2variablesIndexList[distName].index( dim) + 1 self.variables2distributionsMapping[key][ 'reducedDim'] = reducedDim # the dimension of variable in the transformed space self.variables2distributionsMapping[key]['totDim'] = max( self.distributions2variablesIndexList[distName] ) # We will reset the value if the node <variablesTransformation> exist in the raven input file if not self.variablesTransformationDict and self.variables2distributionsMapping[ key]['totDim'] > 1: if self.variables2distributionsMapping[key]['totDim'] != len( self.distributions2variablesIndexList[distName]): self.raiseAnError( IOError, 'The "dim" assigned to the variables insider Sampler are not correct! the "dim" should start from 1, and end with the full dimension of given distribution' ) #Checking the variables transformation if self.variablesTransformationDict: for dist, varsDict in self.variablesTransformationDict.items(): maxDim = len(varsDict['manifestVariables']) listLatentElement = varsDict['latentVariables'] if len(set(listLatentElement)) != len(listLatentElement): dups = set(var for var in listLatentElement if listLatentElement.count(var) > 1) self.raiseAnError( IOError, 'The following are duplicated variables listed in the latentVariables: ' + str(dups)) if len(set(varsDict['manifestVariables'])) != len( varsDict['manifestVariables']): dups = set(var for var in varsDict['manifestVariables'] if varsDict['manifestVariables'].count(var) > 1) self.raiseAnError( IOError, 'The following are duplicated variables listed in the manifestVariables: ' + str(dups)) if len(set(varsDict['manifestVariablesIndex'])) != len( varsDict['manifestVariablesIndex']): dups = set( var + 1 for var in varsDict['manifestVariablesIndex'] if varsDict['manifestVariablesIndex'].count(var) > 1) self.raiseAnError( IOError, 'The following are duplicated variables indices listed in the manifestVariablesIndex: ' + str(dups)) listElement = self.distributions2variablesMapping[dist] for var in listElement: self.variables2distributionsMapping[var.keys()[0]][ 'totDim'] = maxDim #reset the totDim to reflect the totDim of original input space tempListElement = { k.strip(): v for x in listElement for ks, v in x.items() for k in list(ks.strip().split(',')) } listIndex = [] for var in listLatentElement: if var not in set(tempListElement.keys()): self.raiseAnError( IOError, 'The variable listed in latentVariables ' + var + ' is not listed in the given distribution: ' + dist) listIndex.append(tempListElement[var] - 1) if max(listIndex) > maxDim: self.raiseAnError( IOError, 'The maximum dim = ' + str(max(listIndex)) + ' defined for latent variables is exceeded the dimension of the problem ' + str(maxDim)) if len(set(listIndex)) != len(listIndex): dups = set(var + 1 for var in listIndex if listIndex.count(var) > 1) self.raiseAnError( IOError, 'Each of the following dimensions are assigned to multiple latent variables in Samplers: ' + str(dups)) # update the index for latentVariables according to the 'dim' assigned for given var defined in Sampler self.variablesTransformationDict[dist][ 'latentVariablesIndex'] = listIndex def readSamplerInit(self, xmlNode): """ This method is responsible to read only the samplerInit block in the .xml file. This method has been moved from the base sampler class since the samplerInit block is needed only for the MC and stratified (LHS) samplers @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ for child in xmlNode: if child.tag == "samplerInit": self.initSeed = Distributions.randomIntegers(0, 2**31, self) for childChild in child: if childChild.tag == "limit": self.limit = childChild.text elif childChild.tag == "initialSeed": self.initSeed = int(childChild.text) elif childChild.tag == "reseedEachIteration": if childChild.text.lower( ) in utils.stringsThatMeanTrue(): self.reseedAtEachIteration = True elif childChild.tag == "samplingType": self.samplingType = childChild.text elif childChild.tag == "distInit": for childChildChild in childChild: NDdistData = {} for childChildChildChild in childChildChild: if childChildChildChild.tag == 'initialGridDisc': NDdistData[childChildChildChild.tag] = int( childChildChildChild.text) elif childChildChildChild.tag == 'tolerance': NDdistData[ childChildChildChild.tag] = float( childChildChildChild.text) else: self.raiseAnError( IOError, 'Unknown tag ' + childChildChildChild.tag + ' .Available are: initialGridDisc and tolerance!' ) self.NDSamplingParams[ childChildChild.attrib['name']] = NDdistData else: self.raiseAnError( IOError, 'Unknown tag ' + child.tag + ' .Available are: limit, initialSeed, samplingType, reseedEachIteration and distInit!' ) def endJobRunnable(self): """ Returns the maximum number of inputs allowed to be created by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) @ In, None @ Out, endJobRunnable, int, number of runnable jobs at the end of each sample """ return self._endJobRunnable def localInputAndChecks(self, xmlNode): """ Local method. Place here the additional reading, remember to add initial parameters in the method localGetInitParams @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ pass def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} for variable in self.toBeSampled.items(): paramDict[variable[ 0]] = 'is sampled using the distribution ' + variable[1] paramDict['limit'] = self.limit paramDict['initial seed'] = self.initSeed paramDict.update(self.localGetInitParams()) return paramDict def localGetInitParams(self): """ Method used to export to the printer in the base class the additional PERMANENT your local class have @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def getCurrentSetting(self): """ This function is called from the base class to print some of the information inside the class. Whatever is a temporary value in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['counter'] = self.counter paramDict['initial seed'] = self.initSeed for key in self.inputInfo: if key != 'SampledVars': paramDict[key] = self.inputInfo[key] else: for var in self.inputInfo['SampledVars'].keys(): paramDict['Variable: ' + var + ' has value'] = paramDict[key][var] paramDict.update(self.localGetCurrentSetting()) return paramDict def localGetCurrentSetting(self): """ Returns a dictionary with class specific information regarding the current status of the object. @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def _generateDistributions(self, availableDist, availableFunc): """ Generates the distributions and functions. @ In, availableDist, dict, dict of distributions @ In, availableFunc, dict, dict of functions @ Out, None """ if self.initSeed != None: Distributions.randomSeed(self.initSeed) for key in self.toBeSampled.keys(): if self.toBeSampled[key] not in availableDist.keys(): self.raiseAnError( IOError, 'Distribution ' + self.toBeSampled[key] + ' not found among available distributions (check input)!') self.distDict[key] = availableDist[self.toBeSampled[key]] self.inputInfo['crowDist'][key] = json.dumps( self.distDict[key].getCrowDistDict()) for key, val in self.dependentSample.items(): if val not in availableFunc.keys(): self.raiseAnError( 'Function', val, 'was not found among the available functions:', availableFunc.keys()) self.funcDict[key] = availableFunc[val] # check if the correct method is present if "evaluate" not in self.funcDict[key].availableMethods(): self.raiseAnError( IOError, 'Function ' + self.funcDict[key].name + ' does not contain a method named "evaluate". It must be present if this needs to be used in a Sampler!' ) def initialize(self, externalSeeding=None, solutionExport=None): """ This function should be called every time a clean sampler is needed. Called before takeAstep in <Step> @ In, externalSeeding, int, optional, external seed @ In, solutionExport, DataObject, optional, in goal oriented sampling (a.k.a. adaptive sampling this is where the space/point satisfying the constrains) @ Out, None """ if self.initSeed == None: self.initSeed = Distributions.randomIntegers(0, 2**31, self) self.counter = 0 if not externalSeeding: Distributions.randomSeed( self.initSeed) #use the sampler initialization seed self.auxcnt = self.initSeed elif externalSeeding == 'continue': pass #in this case the random sequence needs to be preserved else: Distributions.randomSeed( externalSeeding) #the external seeding is used self.auxcnt = externalSeeding #grab restart dataobject if it's available, then in localInitialize the sampler can deal with it. if 'Restart' in self.assemblerDict.keys(): self.raiseADebug('Restart object: ' + str(self.assemblerDict['Restart'])) self.restartData = self.assemblerDict['Restart'][0][3] self.raiseAMessage('Restarting from ' + self.restartData.name) #check consistency of data try: rdata = self.restartData.getAllMetadata()['crowDist'] sdata = self.inputInfo['crowDist'] self.raiseAMessage('sampler inputs:') for sk, sv in sdata.items(): self.raiseAMessage('| ' + str(sk) + ': ' + str(sv)) for i, r in enumerate(rdata): if type(r) != dict: continue if not r == sdata: self.raiseAMessage('restart inputs %i:' % i) for rk, rv in r.items(): self.raiseAMessage('| ' + str(rk) + ': ' + str(rv)) self.raiseAnError( IOError, 'Restart "%s" data[%i] does not have same inputs as sampler!' % (self.restartData.name, i)) except KeyError as e: self.raiseAWarning( "No CROW distribution available in restart -", e) else: self.raiseAMessage('No restart for ' + self.printTag) #load restart data into existing points if self.restartData is not None: if not self.restartData.isItEmpty(): inps = self.restartData.getInpParametersValues() outs = self.restartData.getOutParametersValues() #FIXME there is no guarantee ordering is accurate between restart data and sampler inputs = list(v for v in inps.values()) existingInps = zip(*inputs) outVals = zip(*list(v for v in outs.values())) self.existing = dict(zip(existingInps, outVals)) #specializing the self.localInitialize() to account for adaptive sampling if solutionExport != None: self.localInitialize(solutionExport=solutionExport) else: self.localInitialize() for distrib in self.NDSamplingParams: if distrib in self.distributions2variablesMapping: params = self.NDSamplingParams[distrib] temp = utils.first( self.distributions2variablesMapping[distrib][0].keys()) self.distDict[temp].updateRNGParam(params) else: self.raiseAnError( IOError, 'Distribution "%s" specified in distInit block of sampler "%s" does not exist!' % (distrib, self.name)) # Store the transformation matrix in the metadata if self.variablesTransformationDict: self.entitiesToRemove = [] for variable in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[variable][ 'name'] dim = self.variables2distributionsMapping[variable]['dim'] totDim = self.variables2distributionsMapping[variable][ 'totDim'] if totDim > 1 and dim == 1: transformDict = {} transformDict['type'] = self.distDict[ variable.strip()].type transformDict['transformationMatrix'] = self.distDict[ variable.strip()].transformationMatrix() self.inputInfo['transformation-' + distName] = transformDict self.entitiesToRemove.append('transformation-' + distName) def localInitialize(self): """ use this function to add initialization features to the derived class it is call at the beginning of each step @ In, None @ Out, None """ pass def _constantVariables(self): """ Method to set the constant variables into the inputInfo dictionary @ In, None @ Out, None """ if len(self.constants) > 0: # we inject the constant variables into the SampledVars self.inputInfo['SampledVars'].update(self.constants) # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight) self.inputInfo['SampledVarsPb'].update( dict.fromkeys(self.constants.keys(), 1.0)) self.inputInfo.update( dict.fromkeys([ 'ProbabilityWeight-' + key for key in self.constants.keys() ], 1.0)) def amIreadyToProvideAnInput(self): #inLastOutput=None): """ This is a method that should be call from any user of the sampler before requiring the generation of a new sample. This method act as a "traffic light" for generating a new input. Reason for not being ready could be for example: exceeding number of samples, waiting for other simulation for providing more information etc. etc. @ In, None @ Out, ready, bool, is this sampler ready to generate another sample? """ ready = True if self.counter < self.limit else False ready = self.localStillReady(ready) return ready def localStillReady(self, ready): #,lastOutput=None """ Determines if sampler is prepared to provide another input. If not, and if jobHandler is finished, this will end sampling. @ In, ready, bool, a boolean representing whether the caller is prepared for another input. @ Out, ready, bool, a boolean representing whether the caller is prepared for another input. """ return ready def generateInput(self, model, oldInput): """ This method has to be overwritten to provide the specialization for the specific sampler The model instance in might be needed since, especially for external codes, only the code interface possesses the dictionary for reading the variable definition syntax @ In, model, model instance, it is the instance of a RAVEN model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, generateInput, tuple(0,list), list contains the new inputs -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. The Out parameter depends on the results of generateInput If a new point is found, the default Out above is correct. If a restart point is found: @ Out, generateInput, tuple(int,dict), (1,realization dictionary) """ self.counter += 1 #since we are creating the input for the next run we increase the counter and global counter self.auxcnt += 1 #exit if over the limit if self.counter > self.limit: self.raiseADebug( 'Exceeded number of points requested in sampling! Moving on...' ) #FIXME, the following condition check is make sure that the require info is only printed once when dump metadata to xml, this should be removed in the future when we have a better way to dump the metadata if self.counter > 1: for key in self.entitiesToRemove: self.inputInfo.pop(key, None) if self.reseedAtEachIteration: Distributions.randomSeed(self.auxcnt - 1) self.inputInfo['prefix'] = str(self.counter) model.getAdditionalInputEdits(self.inputInfo) self.localGenerateInput(model, oldInput) ##### TRANSFORMATION ##### # add latent variables and original variables to self.inputInfo if self.variablesTransformationDict: for dist, var in self.variablesTransformationDict.items(): if self.transformationMethod[dist] == 'pca': self.pcaTransform(var, dist) else: self.raiseAnError( NotImplementedError, 'transformation method is not yet implemented for ' + self.transformationMethod[dist] + ' method') ##### REDUNDANT FUNCTIONALS ##### # generate the function variable values for var in self.dependentSample.keys(): test = self.funcDict[var].evaluate("evaluate", self.values) for corrVar in var.split(","): self.values[corrVar.strip()] = test ##### CONSTANT VALUES ###### self._constantVariables() ##### RESTART ##### #check if point already exists if self.restartData is not None: inExisting = self.restartData.getMatchingRealization( self.values, tol=self.restartTolerance) else: inExisting = None #if not found or not restarting, we have a new point! if inExisting is None: self.raiseADebug('Found new point to sample:', self.values) return 0, model.createNewInput(oldInput, self.type, **self.inputInfo) #otherwise, return the restart point else: self.raiseADebug('Point found in restart:', inExisting['inputs']) realization = {} realization['metadata'] = copy.deepcopy(self.inputInfo) realization['inputs'] = inExisting['inputs'] realization['outputs'] = inExisting['outputs'] realization['prefix'] = self.inputInfo['prefix'] return 1, realization def pcaTransform(self, varsDict, dist): """ This method is used to map latent variables with respect to the model input variables both the latent variables and the model input variables will be stored in the dict: self.inputInfo['SampledVars'] @ In, varsDict, dict, dictionary contains latent and manifest variables {'latentVariables':[latentVar1,latentVar2,...], 'manifestVariables':[var1,var2,...]} @ In, dist, string, the distribution name associated with given variable set @ Out, None """ latentVariablesValues = [] listIndex = [] manifestVariablesValues = [None] * len(varsDict['manifestVariables']) for index, lvar in enumerate(varsDict['latentVariables']): for var, value in self.values.items(): if lvar == var: latentVariablesValues.append(value) listIndex.append(varsDict['latentVariablesIndex'][index]) varName = utils.first( utils.first(self.distributions2variablesMapping[dist]).keys()) varsValues = self.distDict[varName].pcaInverseTransform( latentVariablesValues, listIndex) for index1, index2 in enumerate(varsDict['manifestVariablesIndex']): manifestVariablesValues[index2] = varsValues[index1] manifestVariablesDict = dict( zip(varsDict['manifestVariables'], manifestVariablesValues)) self.values.update(manifestVariablesDict) @abc.abstractmethod def localGenerateInput(self, model, oldInput): """ This class need to be overwritten since it is here that the magic of the sampler happens. After this method call the self.inputInfo should be ready to be sent to the model @ In, model, model instance, Model instance @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, None """ pass def generateInputBatch(self, myInput, model, batchSize, projector=None): #,lastOutput=None """ this function provide a mask to create several inputs at the same time It call the generateInput function as many time as needed @ In, myInput, list, list containing one input set @ In, model, model instance, it is the instance of a RAVEN model @ In, batchSize, int, the number of input sets required @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ newInputs = [] #inlastO = None #if lastOutput: # if not lastOutput.isItEmpty(): inlastO = lastOutput #while self.amIreadyToProvideAnInput(inlastO) and (self.counter < batchSize): while self.amIreadyToProvideAnInput() and (self.counter < batchSize): if projector == None: newInputs.append(self.generateInput(model, myInput)) else: newInputs.append(self.generateInput(model, myInput, projector)) return newInputs def finalizeActualSampling(self, jobObject, model, myInput): """ This function is used by samplers that need to collect information from a finished run. Provides a generic interface that all samplers will use, for specifically handling any sub-class, the localFinalizeActualSampling should be overridden instead, as finalizeActualSampling provides only generic functionality shared by all Samplers and will in turn call the localFinalizeActualSampling before returning. @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ self.localFinalizeActualSampling(jobObject, model, myInput) def localFinalizeActualSampling(self, jobObject, model, myInput): """ Overwrite only if you need something special at the end of each run.... This function is used by samplers that need to collect information from the just ended run For example, for a Dynamic Event Tree case, this function can be used to retrieve the information from the just finished run of a branch in order to retrieve, for example, the distribution name that caused the trigger, etc. It is a essentially a place-holder for most of the sampler to remain compatible with the StepsCR structure @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ pass def handleFailedRuns(self, failedRuns): """ Collects the failed runs from the Step and allows samples to handle them individually if need be. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ self.raiseADebug('===============') self.raiseADebug('| RUN SUMMARY |') self.raiseADebug('===============') if len(failedRuns) > 0: self.raiseAWarning( 'There were %i failed runs! Run with verbosity = debug for more details.' % (len(failedRuns))) for run in failedRuns: metadata = run.getMetadata() self.raiseADebug(' Run number %s FAILED:' % run.identifier, run.command) self.raiseADebug(' return code :', run.getReturnCode()) if metadata is not None: self.raiseADebug(' sampled vars:') for v, k in metadata['SampledVars'].items(): self.raiseADebug(' ', v, ':', k) else: self.raiseADebug('All runs completed without returning errors.') self._localHandleFailedRuns(failedRuns) self.raiseADebug('===============') self.raiseADebug(' END SUMMARY ') self.raiseADebug('===============') def _localHandleFailedRuns(self, failedRuns): """ Specialized method for samplers to handle failed runs. Defaults to failing runs. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ if len(failedRuns) > 0: self.raiseAnError(IOError, 'There were failed runs; aborting RAVEN.')
class DataObject(utils.metaclass_insert(abc.ABCMeta, BaseType)): """ Base class. Data objects are RAVEN's method for storing data internally and passing it from one RAVEN entity to another. Fundamentally, they consist of a collection of realizations, each of which contains inputs, outputs, and pointwise metadata. In addition, the data object has global metadata. The pointwise inputs and outputs could be floats, time-dependent, or ND-dependent variables. This base class is used to force the consistent API between all data containers """ ### INPUT SPECIFICATION ### @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class "cls". @ In, cls, the class for which we are retrieving the specification @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying the input of cls. """ inputSpecification = super(DataObject, cls).getInputSpecification() inputSpecification.addParam('hierarchical', InputData.BoolType) inputInput = InputData.parameterInputFactory( 'Input', contentType=InputData.StringType) #TODO list inputSpecification.addSub(inputInput) outputInput = InputData.parameterInputFactory( 'Output', contentType=InputData.StringType) #TODO list inputSpecification.addSub(outputInput) # TODO this should be specific to ND set indexInput = InputData.parameterInputFactory( 'Index', contentType=InputData.StringType) #TODO list indexInput.addParam('var', InputData.StringType, True) inputSpecification.addSub(indexInput) optionsInput = InputData.parameterInputFactory("options") for option in ['operator', 'pivotParameter']: optionSubInput = InputData.parameterInputFactory( option, contentType=InputData.StringType) optionsInput.addSub(optionSubInput) for option in ['inputRow', 'outputRow']: optionSubInput = InputData.parameterInputFactory( option, contentType=InputData.IntegerType) optionsInput.addSub(optionSubInput) for option in ['outputPivotValue', 'inputPivotValue']: optionSubInput = InputData.parameterInputFactory( option, contentType=InputData.FloatType) optionsInput.addSub(optionSubInput) inputSpecification.addSub(optionsInput) #inputSpecification.addParam('type', param_type = InputData.StringType, required = False) #inputSpecification.addSub(InputData.parameterInputFactory('Input',contentType=InputData.StringType)) #inputSpecification.addSub(InputData.parameterInputFactory('Output',contentType=InputData.StringType)) #inputSpecification.addSub(InputData.parameterInputFactory('options',contentType=InputData.StringType)) return inputSpecification def __init__(self): """ Constructor. @ In, None @ Out, None """ BaseType.__init__(self) self.name = 'DataObject' self.printTag = self.name self._sampleTag = 'RAVEN_sample_ID' # column name to track samples self.protectedTags = ['RAVEN_parentID', 'RAVEN_isEnding' ] # list(str) protected RAVEN variable names, # should not be avail to user as var names self._inputs = [] # list(str) if input variables self._outputs = [] # list(str) of output variables self._metavars = [] # list(str) of POINTWISE metadata variables self._orderedVars = [] # list(str) of vars IN ORDER of their index self._meta = {} # dictionary to collect meta until data is collapsed self._selectInput = None # if not None, describes how to collect input data from history self._selectOutput = None # if not None, describes how to collect output data from history self._pivotParams = { } # independent dimensions as keys, values are the vars that depend on them self._fromVarToIndex = { } # mapping between variables and indexes ({var:index}). # "index" here refers to dimensional variables (e.g. time, x, y, z etc) self._aliases = {} # variable aliases self._data = None # underlying data structure self._collector = None # object used to collect samples self._inputKDTree = None # for finding outputs given inputs (pointset only?) self._scaleFactors = None # scaling factors inputs as {var:(mean,scale)} self.hierarchical = False # this flag controls the printing/plotting of the dataobject # in case it is an hierarchical one. # If True, all the branches are going to be printed/plotted independenttly, # otherwise the are going to be reconstructed @property def sampleTag(self): """ Getter property for _sampleTag, the tag that identifies the realization label for RAVEN @ In, None @ Out, sampleTag, string, variable name """ return self._sampleTag def _readMoreXML(self, xmlNode): """ Initializes data object based on XML input @ In, xmlNode, xml.etree.ElementTree.Element or InputData.ParameterInput specification, input information @ Out, None """ if isinstance(xmlNode, InputData.ParameterInput): inp = xmlNode else: inp = DataObject.getInputSpecification()() inp.parseNode(xmlNode) # get hierarchical strategy self.hierarchical = inp.parameterValues.get("hierarchical", False) pivotParam = None # single pivot parameter given in the input for child in inp.subparts: # TODO check for repeats, "notAllowdInputs", names in both input and output space if child.getName() == 'Input': self._inputs.extend( list(x.strip() for x in child.value.split(',') if x.strip() != '')) elif child.getName() == 'Output': self._outputs.extend( list(x.strip() for x in child.value.split(',') if x.strip() != '')) elif child.getName() == 'Index': depends = list(d.strip() for d in child.value.split(',')) var = child.parameterValues['var'] self._pivotParams[var] = depends # options node elif child.getName() == 'options': duplicateInp = False # if True, then multiple specification options were used for input duplicateOut = False # if True, then multiple specification options were used for output for cchild in child.subparts: # pivot if cchild.getName() == 'pivotParameter': # TODO not applicable to ND, only to HistSet, but read it here # TODO add checks somewhere if both "index" and "pivotParameter" are provided self._tempPivotParam = cchild.value.strip() # input pickers elif cchild.getName() in ['inputRow', 'inputPivotValue']: if self._selectInput is not None: duplicateInp = True self.setSelectiveInput(cchild.getName(), cchild.value) # output pickers elif cchild.getName() in [ 'outputRow', 'outputPivotValue', 'operator' ]: if self._selectOutput is not None: duplicateOut = True self._selectOutput = (cchild.getName(), cchild.value) # TODO check this in the input checker instead of here? if duplicateInp: self.raiseAWarning( 'Multiple options were given to specify the input row to read! Using last entry:', self._selectInput) if duplicateOut: self.raiseAWarning( 'Multiple options were given to specify the output row to read! Using last entry:', self._selectOutput) # end options node # end input reading # clear keywords InputPlaceHolder but NOT the OutputPlaceHolder, for legacy reasons while 'InputPlaceHolder' in self._inputs: self._inputs.remove('InputPlaceHolder') #while 'OutputPlaceHolder' in self._outputs: # self._outputs.remove('OutputPlaceHolder') # set default pivot parameters, if needed self._setDefaultPivotParams() # remove index variables from input/output spaces, but silently, since we'll still have them available later for index in self._pivotParams.keys(): try: self._outputs.remove(index) except ValueError: pass #not requested as output anyway try: self._inputs.remove(index) except ValueError: pass #not requested as input anyway self._orderedVars = self._inputs + self._outputs # check if protected vars have been violated if set(self.protectedTags).issubset(set(self._orderedVars)): self.raiseAnError( IOError, 'Input, Output and Index variables can not be part of RAVEN protected tags: ' + ','.join(self.protectedTags)) # create dict var to index # FIXME: this dict will not work in case of variables depending on multiple indexes. When this need comes, we will change this check(alfoa) if self.indexes: for ind in self.indexes: self._fromVarToIndex.update( dict.fromkeys(self._pivotParams[ind], ind)) if self.messageHandler is None: self.messageHandler = MessageHandler() def _setDefaultPivotParams(self): """ Allows setting default pivot parameters. In general, does nothing. @ In, None @ Out, None """ pass def setPivotParams(self, params): """ Sets the pivot parameters for variables. @ In, params, dict, var:[params] as str:list(str) @ Out, None """ # TODO typechecking, assertions coords = set().union(*params.values()) for coord in coords: if coord not in self._pivotParams: self._pivotParams[coord] = list(var for var in params.keys() if coord in params[var]) else: self._pivotParams[coord] = list( set( list(var for var in params.keys() if coord in params[var]) + self._pivotParams[coord])) def setSelectiveInput(self, option, value): """ Sets the input selection method for retreiving subset data. @ In, option, str, from [inputRow,inputPivotValue] @ In, value, int or float, either the index (row number) or the pivot value (will be cast if other type) @ Out, None """ assert (option in ['inputRow', 'inputPivotValue']) if option == 'inputRow': value = int(value) elif option == 'inputPivotValue': value = float(value) self._selectInput = (option, value) self.raiseADebug('Set selective input to', self._selectInput) def setSelectiveOutput(self, option, value): """ Sets the output selection method for retreiving subset data. @ In, option, str, from [outputRow,outputPivotValue,operator] @ In, value, int or float or str, index or pivot value or operator name respectively @ Out, None """ assert (option in ['outputRow', 'outputPivotValue', 'operator']) if option == 'outputRow': value = int(value) elif option == 'outputPivotValue': value = float(value) elif option == 'operator': value = value.strip().lower() self._selectOutput = (option, value) self.raiseADebug('Set selective output to', self._selectOutput) ###################### # DATA CONTAINER API # ###################### @abc.abstractmethod def addExpectedMeta(self, keys, params={}): """ Registers meta to look for in realization @ In, keys, set(str), keys to register @ In, params, dict, optional, {key:[indexes]}, keys of the dictionary are the variable names, values of the dictionary are lists of the corresponding indexes/coordinates of given variable @ Out, None """ pass @abc.abstractmethod def addMeta(self, tag, xmlDict): """ Adds general (not pointwise) metadata to this data object. Can add several values at once, collected as a dict keyed by target variables. Data ends up being written as follows (see docstrings above for dict structure) - A good default for 'target' is 'general' if there's not a specific target <tag> <target> <scalarMetric>value</scalarMetric> <scalarMetric>value</scalarMetric> <vectorMetric> <wrt>value</wrt> <wrt>value</wrt> </vectorMetric> </target> <target> <scalarMetric>value</scalarMetric> <vectorMetric> <wrt>value</wrt> </vectorMetric> </target> </tag> @ In, tag, str, section to add metadata to, usually the data submitter (BasicStatistics, DataObject, etc) @ In, xmlDict, dict, data to change, of the form {target:{scalarMetric:value,scalarMetric:value,vectorMetric:{wrt:value,wrt:value}}} @ Out, None """ pass @abc.abstractmethod def addRealization(self, rlz): """ Adds a "row" (or "sample") to this data object. This is the method to add data to this data object. Note that rlz can include many more variables than this data object actually wants. Before actually adding the realization, data is formatted for this data object. @ In, rlz, dict, {var:val} format where "var" is the variable name as a string, "val" is either a float or a np.ndarray of values. @ Out, None """ pass @abc.abstractmethod def addVariable(self, varName, values, classify='meta'): """ Adds a variable/column to the data. "values" needs to be as long as self.size. @ In, varName, str, name of new variable @ In, values, np.array, new values (floats/str for scalars, xr.DataArray for hists) @ In, classify, str, optional, either 'input', 'output', or 'meta' @ Out, None """ pass @abc.abstractmethod def asDataset(self): """ Casts this dataobject as an xr.Dataset. Functionally, typically collects the data from self._collector and places it in self._data. Efficiency note: this is the slowest part of typical data collection. @ In, None @ Out, xarray.Dataset, all the data from this data object. """ pass @abc.abstractmethod def constructNDSample(self, vals, dims, coords, name=None): """ Constructs a single realization instance (for one variable) from a realization entry. @ In, vals, np.ndarray, should have shape of (len(coords[d]) for d in dims) @ In, dims, list(str), names of dependent dimensions IN ORDER of appearance in vals, e.g. ['time','x','y'] @ In, coords, dict, {dimension:list(float)}, values for each dimension at which 'val' was obtained, e.g. {'time': @ Out, obj, xr.DataArray, completed realization instance suitable for sending to "addRealization" """ pass @abc.abstractmethod def getDimensions(self, var): """ Provides the independent dimensions that this variable depends on. To get all dimensions at once, use self.indexes property. @ In, var, str, name of variable (if None, give all) @ Out, dims, dict, {name:values} of independent dimensions """ pass @abc.abstractmethod def getMeta(self, keys=None, pointwise=False, general=False): """ Method to obtain entries in the metadata. If niether pointwise nor general, then returns an empty dict. @ In, keys, list(str), optional, the keys (or main tag) to search for. If None, return all. @ In, pointwise, bool, optional, if True then matches will be searched in the pointwise metadata @ In, general, bool, optional, if True then matches will be searched in the general metadata @ Out, meta, dict, key variables/xpaths to data object entries (column if pointwise, XML if general) """ pass @abc.abstractmethod def getVars(self, subset=None): """ Gives list of variables that are part of this dataset. @ In, subset, str, optional, if given can return 'input','output','meta' subset types @ Out, getVars, list(str), list of variable names requested """ pass @abc.abstractmethod def getVarValues(self, var): """ Returns the sampled values of "var" @ In, var, str or list(str), name(s) of variable(s) @ Out, res, xr.DataArray, samples (or dict of {var:xr.DataArray} if multiple variables requested) """ pass @abc.abstractmethod def realization(self, index=None, matchDict=None, tol=1e-15): """ Method to obtain a realization from the data, either by index or matching value. Either "index" or "matchDict" must be supplied. If matchDict and no match is found, will return (len(self),None) after the pattern of numpy, scipy @ In, index, int, optional, number of row to retrieve (by index, not be "sample") @ In, matchDict, dict, optional, {key:val} to search for matches @ In, tol, float, optional, tolerance to which match should be made @ Out, index, int, optional, index where found (or len(self) if not found), only returned if matchDict @ Out, rlz, dict, realization requested (None if not found) """ pass @abc.abstractmethod def load(self, fname, style='netCDF', **kwargs): """ Reads this dataset from disk based on the format. @ In, fname, str, path and name of file to read @ In, style, str, optional, options are enumerated below @ In, kwargs, dict, optional, additional arguments to pass to reading function @ Out, None """ pass @abc.abstractmethod def remove(self, realization=None, variable=None): """ Used to remove either a realization or a variable from this data object. @ In, realization, dict or int, optional, (matching or index of) realization to remove @ In, variable, str, optional, name of "column" to remove @ Out, None """ pass @abc.abstractmethod def reset(self): """ Sets this object back to its initial state. @ In, None @ Out, None """ pass @abc.abstractmethod def sliceByIndex(self, axis): """ Returns list of realizations at "snapshots" along "axis" @ In, axis, str, name of index along which to obtain slices @ Out, slices, list, list of slices """ pass @abc.abstractmethod def write(self, fname, style='netCDF', **kwargs): """ Writes this dataset to disk based on the format. @ In, fname, str, path and name of file to write @ In, style, str, optional, options are enumerated below @ In, kwargs, dict, optional, additional arguments to pass to writing function @ Out, None """ pass
class CodeInterfaceBase(utils.metaclass_insert(abc.ABCMeta, object)): """ Code Interface base class. This class should be the base class for all the code interfaces. In this way some methods are forced to be implemented and some automatic checking features are available (checking of the inputs if no executable is available), etc. NOTE: As said, this class SHOULD be the base class of the code interfaces. However, the developer of a newer code interface can decide to avoid to inherit from this class if he does not want to exploit the automatic checking of the code interface's functionalities """ def __init__(self): """ Constructor @ In, None @ Out, None """ self.inputExtensions = [] def genCommand(self, inputFiles, executable, flags=None, fileArgs=None, preExec=None): """ This method is used to retrieve the command (in tuple format) needed to launch the Code. This method checks a boolean environment variable called 'RAVENinterfaceCheck': if true, the subcodeCommand is going to be overwritten with an empty string. In this way we can check the functionality of the interface without having an executable. See Driver.py to understand how this Env variable is set @ In, inputFiles, list, List of input files (length of the list depends on the number of inputs have been added in the Step is running this code) @ In, executable, string, executable name with absolute path (e.g. /home/path_to_executable/code.exe) @ In, flags, dict, optional, dictionary containing the command-line flags the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 i0extension =0 .inp0/ >< /Code >) @ In, fileArgs, dict, optional, a dictionary containing the auxiliary input file variables the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 aux0extension =0 .aux0/ >< /Code >) @ In, preExec, string, optional, a string the command that needs to be pre-executed before the actual command here defined @ Out, returnCommand, tuple, tuple containing the generated command. returnCommand[0] is the command to run the code (string), returnCommand[1] is the name of the output root """ if preExec is None: subcodeCommand, outputfileroot = self.generateCommand( inputFiles, executable, clargs=flags, fargs=fileArgs) else: subcodeCommand, outputfileroot = self.generateCommand( inputFiles, executable, clargs=flags, fargs=fileArgs, preExec=preExec) if os.environ.get('RAVENinterfaceCheck', 'False').lower() in utils.stringsThatMeanTrue(): return [('parallel', '')], outputfileroot returnCommand = subcodeCommand, outputfileroot return returnCommand def readMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this class and initialize some members based on inputs. @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ self._readMoreXML(xmlNode) def _readMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some members based on inputs. This can be overloaded in specialized code interface in order to read specific flags @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ pass @abc.abstractmethod def generateCommand(self, inputFiles, executable, clargs=None, fargs=None, preExec=None): """ This method is used to retrieve the command (in tuple format) needed to launch the Code. @ In, inputFiles, list, List of input files (length of the list depends on the number of inputs have been added in the Step is running this code) @ In, executable, string, executable name with absolute path (e.g. /home/path_to_executable/code.exe) @ In, clargs, dict, optional, dictionary containing the command-line flags the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 i0extension =0 .inp0/ >< /Code >) @ In, fargs, dict, optional, a dictionary containing the auxiliary input file variables the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 aux0extension =0 .aux0/ >< /Code >) @ In, preExec, string, optional, a string the command that needs to be pre-executed before the actual command here defined @ Out, returnCommand, tuple, tuple containing the generated command. returnCommand[0] is the command to run the code (string), returnCommand[1] is the name of the output root """ return @abc.abstractmethod def createNewInput(self, currentInputFiles, oriInputFiles, samplerType, **Kwargs): """ This method is used to generate an input based on the information passed in. @ In, currentInputFiles, list, list of current input files (input files from last this method call) @ In, oriInputFiles, list, list of the original input files @ In, samplerType, string, Sampler type (e.g. MonteCarlo, Adaptive, etc. see manual Samplers section) @ In, Kwargs, dictionary, kwarded dictionary of parameters. In this dictionary there is another dictionary called "SampledVars" where RAVEN stores the variables that got sampled (e.g. Kwargs['SampledVars'] => {'var1':10,'var2':40}) @ Out, newInputFiles, list, list of newer input files, list of the new input files (modified and not) """ pass #################### ####### OPTIONAL METHODS ####### #################### def getInputExtension(self): """ This method returns a list of extension the code interface accepts for the input file (the main one) @ In, None @ Out, tuple, tuple of strings containing accepted input extension (e.g.(".i",".inp"]) ) """ return tuple(self.inputExtensions) def setInputExtension(self, exts): """ This method sets a list of extension the code interface accepts for the input files @ In, exts, list, list or other array containing accepted input extension (e.g.[".i",".inp"]) @ Out, None """ self.inputExtensions = exts[:] def addInputExtension(self, exts): """ This method adds a list of extension the code interface accepts for the input files @ In, exts, list, list or other array containing accepted input extension (e.g.[".i",".inp"]) @ Out, None """ for e in exts: self.inputExtensions.append(e) def addDefaultExtension(self): """ This method sets a list of default extensions a specific code interface accepts for the input files. This method should be overwritten if these are not acceptable defaults. @ In, None @ Out, None """ self.addInputExtension(['i', 'inp', 'in']) def finalizeCodeOutput(self, command, output, workingDir): """ this method is called by the RAVEN code at the end of each run (if the method is present). It can be used for those codes, that do not create CSV files to convert the whatever output format into a csv @ In, command, string, the command used to run the just ended job @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, output, string, optional, present in case the root of the output file gets changed in this method. """ return output def checkForOutputFailure(self, output, workingDir): """ This method is called by RAVEN at the end of each run if the return code is == 0. This method needs to be implemented by the codes that, if the run fails, return a return code that is 0 This can happen in those codes that record the failure of the job (e.g. not converged, etc.) as normal termination (returncode == 0) This method can be used, for example, to parse the output file looking for a special keyword that testifies that a particular job got failed (e.g. in RELAP5 would be the keyword "********") @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, failure, bool, True if the job is failed, False otherwise """ failure = False return failure
class Step(utils.metaclass_insert(abc.ABCMeta, BaseType)): """ This class implement one step of the simulation pattern. Usage: myInstance = Step() !Generate the instance myInstance.XMLread(xml.etree.ElementTree.Element) !This method read the xml and perform all the needed checks myInstance.takeAstep() !This method perform the step --Internal chain [in square brackets methods that can be/must be overwritten] self.XMLread(xml)-->self._readMoreXML(xml) -->[self._localInputAndChecks(xmlNode)] self.takeAstep() -->self_initializeStep() -->[self._localInitializeStep()] -->[self._localTakeAstepRun()] -->self._endStepActions() --Other external methods-- myInstance.whoAreYou() -see BaseType class- myInstance.myCurrentSetting() -see BaseType class- myInstance.printMe() -see BaseType class- --Adding a new step subclass-- **<MyClass> should inherit at least from Step or from another step already presents **DO NOT OVERRIDE any of the class method that are not starting with self.local* **ADD your class to the dictionary __InterfaceDict at the end of the module Overriding the following methods overriding unless you inherit from one of the already existing methods: self._localInputAndChecks(xmlNode) : used to specialize the xml reading and the checks self._localGetInitParams() : used to retrieve the local parameters and values to be printed self._localInitializeStep(inDictionary) : called after this call the step should be able the accept the call self.takeAstep(inDictionary): self._localTakeAstepRun(inDictionary) : this is where the step happens, after this call the output is ready """ def __init__(self): """ Constructor @ In, None @ Out, None """ BaseType.__init__(self) self.parList = [ ] # List of list [[role played in the step, class type, specialization, global name (user assigned by the input)]] self.sleepTime = 0.005 # Waiting time before checking if a run is finished #If a step possess re-seeding instruction it is going to ask to the sampler to re-seed according # re-seeding = a number to be used as a new seed # re-seeding = 'continue' the use the already present random environment #If there is no instruction (self.initSeed = None) the sampler will reinitialize self.initSeed = None self._knownAttribute += [ 'sleepTime', 're-seeding', 'pauseAtEnd', 'fromDirectory' ] self._excludeFromModelValidation = ['SolutionExport'] self.printTag = 'STEPS' def _readMoreXML(self, xmlNode): """ Handles the reading of all the XML describing the step Since step are not reused there will not be changes in the parameter describing the step after this reading @ In, xmlNode, xml.etree.ElementTree.Element, XML element node that represents the portion of the input that belongs to this Step class @ Out, None """ printString = 'For step of type {0:15} and name {1:15} the attribute {3:10} has been assigned to a not understandable value {2:10}' self.raiseADebug( 'move this tests to base class when it is ready for all the classes' ) if not set(xmlNode.attrib.keys()).issubset(set(self._knownAttribute)): self.raiseAnError( IOError, 'In step of type {0:15} and name {1:15} there are unknown attributes {2:100}' .format(self.type, self.name, str(xmlNode.attrib.keys()))) if 're-seeding' in xmlNode.attrib.keys(): self.initSeed = xmlNode.attrib['re-seeding'] if self.initSeed.lower() == "continue": self.initSeed = "continue" else: try: self.initSeed = int(self.initSeed) except: self.raiseAnError( IOError, printString.format(self.type, self.name, self.initSeed, 're-seeding')) if 'sleepTime' in xmlNode.attrib.keys(): try: self.sleepTime = float(xmlNode.attrib['sleepTime']) except: self.raiseAnError( IOError, printString.format(self.type, self.name, xmlNode.attrib['sleepTime'], 'sleepTime')) for child in xmlNode: classType, classSubType = child.attrib.get( 'class'), child.attrib.get('type') if None in [classType, classSubType]: self.raiseAnError( IOError, "In Step named " + self.name + ", subnode " + child.tag + ", and body content = " + child.text + " the attribute class and/or type has not been found!") self.parList.append([ child.tag, child.attrib.get('class'), child.attrib.get('type'), child.text ]) self.pauseEndStep = False if 'pauseAtEnd' in xmlNode.attrib.keys(): if xmlNode.attrib['pauseAtEnd'].lower( ) in utils.stringsThatMeanTrue(): self.pauseEndStep = True elif xmlNode.attrib['pauseAtEnd'].lower( ) in utils.stringsThatMeanFalse(): self.pauseEndStep = False else: self.raiseAnError( IOError, printString.format(self.type, self.name, xmlNode.attrib['pauseAtEnd'], 'pauseAtEnd')) self._localInputAndChecks(xmlNode) if None in self.parList: self.raiseAnError( IOError, 'A problem was found in the definition of the step ' + str(self.name)) @abc.abstractmethod def _localInputAndChecks(self, xmlNode): """ Place here specialized reading, input consistency check and initialization of what will not change during the whole life of the object @ In, xmlNode, xml.etree.ElementTree.Element, XML element node that represents the portion of the input that belongs to this Step class @ Out, None """ pass def getInitParams(self): """ Exports a dictionary with the information that will stay constant during the existence of the instance of this class. Overloaded from BaseType This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['Sleep time'] = str(self.sleepTime) paramDict['Initial seed'] = str(self.initSeed) for List in self.parList: paramDict[List[0]] = 'Class: ' + str(List[1]) + ' Type: ' + str( List[2]) + ' Global name: ' + str(List[3]) paramDict.update(self._localGetInitParams()) return paramDict @abc.abstractmethod def _localGetInitParams(self): """ Place here a specialization of the exporting of what in the step is added to the initial parameters the printing format of paramDict is key: paramDict[key] @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def _initializeStep(self, inDictionary): """ Method to initialize the current step. the job handler is restarted and re-seeding action are performed @ In, inDictionary, dict, the initialization dictionary @ Out, None """ inDictionary['jobHandler'].startingNewStep() self.raiseADebug('jobHandler initialized') self._localInitializeStep(inDictionary) @abc.abstractmethod def _localInitializeStep(self, inDictionary): """ This is the API for the local initialization of the children classes of step The inDictionary contains the instances for each possible role supported in the step (dictionary keywords) the instances of the objects in list if more than one is allowed The role of _localInitializeStep is to call the initialize method instance if needed Remember after each initialization to put: self.raiseADebug('for the role "+key+" the item of class '+inDictionary['key'].type+' and name '+inDictionary['key'].name+' has been initialized') @ In, inDictionary, dict, the initialization dictionary @ Out, None """ pass @abc.abstractmethod def _localTakeAstepRun(self, inDictionary): """ This is the API for the local run of a step for the children classes @ In, inDictionary, dict, contains the list of instances (see Simulation) @ Out, None """ pass def _endStepActions(self, inDictionary): """ This method is intended for performing actions at the end of a step @ In, inDictionary, dict, contains the list of instances (see Simulation) @ Out, None """ if self.pauseEndStep: for i in range(len(inDictionary['Output'])): #if type(inDictionary['Output'][i]).__name__ not in ['str','bytes','unicode']: if inDictionary['Output'][i].type in ['OutStreamPlot']: inDictionary['Output'][i].endInstructions('interactive') def takeAstep(self, inDictionary): """ This should work for everybody just split the step in an initialization and the run itself inDictionary[role]=instance or list of instance @ In, inDictionary, dict, contains the list of instances (see Simulation) @ Out, None """ self.raiseAMessage('*** Beginning initialization ***') self._initializeStep(inDictionary) self.raiseAMessage('*** Initialization done ***') self.raiseAMessage('*** Beginning run ***') self._localTakeAstepRun(inDictionary) self.raiseAMessage('*** Run finished ***') self.raiseAMessage('*** Closing the step ***') self._endStepActions(inDictionary) self.raiseAMessage('*** Step closed ***')
class Model(utils.metaclass_insert(abc.ABCMeta,BaseType),Assembler): """ A model is something that given an input will return an output reproducing some physical model it could as complex as a stand alone code, a reduced order model trained somehow or something externally build and imported by the user """ try: plugins = importlib.import_module("Models.ModelPlugInFactory") except Exception as ae: print("FAILED PLUGIN IMPORT",repr(ae)) @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, cls, the class for which we are retrieving the specification @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying input of cls. """ inputSpecification = super(Model, cls).getInputSpecification() inputSpecification.addParam("subType", InputData.StringType, True) ## Begin alias tag AliasInput = InputData.parameterInputFactory("alias", contentType=InputData.StringType) AliasInput.addParam("variable", InputData.StringType, True) AliasTypeInput = InputData.makeEnumType("aliasType","aliasTypeType",["input","output"]) AliasInput.addParam("type", AliasTypeInput, True) inputSpecification.addSub(AliasInput) ## End alias tag return inputSpecification validateDict = {} validateDict['Input' ] = [] validateDict['Output' ] = [] validateDict['Sampler'] = [] validateDict['Optimizer'] = [] testDict = {} testDict = {'class':'','type':[''],'multiplicity':0,'required':False} #FIXME: a multiplicity value is needed to control role that can have different class #the possible inputs validateDict['Input'].append(testDict.copy()) validateDict['Input' ][0]['class' ] = 'DataObjects' validateDict['Input' ][0]['type' ] = ['PointSet','HistorySet'] validateDict['Input' ][0]['required' ] = False validateDict['Input' ][0]['multiplicity'] = 'n' validateDict['Input'].append(testDict.copy()) validateDict['Input' ][1]['class' ] = 'Files' # FIXME there's lots of types that Files can be, so until XSD replaces this, commenting this out #validateDict['Input' ][1]['type' ] = [''] validateDict['Input' ][1]['required' ] = False validateDict['Input' ][1]['multiplicity'] = 'n' #the possible outputs validateDict['Output'].append(testDict.copy()) validateDict['Output' ][0]['class' ] = 'DataObjects' validateDict['Output' ][0]['type' ] = ['PointSet','HistorySet','DataSet'] validateDict['Output' ][0]['required' ] = False validateDict['Output' ][0]['multiplicity'] = 'n' validateDict['Output'].append(testDict.copy()) validateDict['Output' ][1]['class' ] = 'Databases' validateDict['Output' ][1]['type' ] = ['HDF5'] validateDict['Output' ][1]['required' ] = False validateDict['Output' ][1]['multiplicity'] = 'n' validateDict['Output'].append(testDict.copy()) validateDict['Output' ][2]['class' ] = 'OutStreams' validateDict['Output' ][2]['type' ] = ['Plot','Print'] validateDict['Output' ][2]['required' ] = False validateDict['Output' ][2]['multiplicity'] = 'n' #the possible samplers validateDict['Sampler'].append(testDict.copy()) validateDict['Sampler'][0]['class' ] ='Samplers' validateDict['Sampler'][0]['required' ] = False validateDict['Sampler'][0]['multiplicity'] = 1 validateDict['Sampler'][0]['type'] = ['MonteCarlo', 'DynamicEventTree', 'Stratified', 'Grid', 'LimitSurfaceSearch', 'AdaptiveDynamicEventTree', 'FactorialDesign', 'ResponseSurfaceDesign', 'SparseGridCollocation', 'AdaptiveSparseGrid', 'Sobol', 'AdaptiveSobol', 'EnsembleForward', 'CustomSampler'] validateDict['Optimizer'].append(testDict.copy()) validateDict['Optimizer'][0]['class' ] ='Optimizers' validateDict['Optimizer'][0]['required' ] = False validateDict['Optimizer'][0]['multiplicity'] = 1 validateDict['Optimizer'][0]['type'] = ['SPSA','FiniteDifference','ConjugateGradient'] @classmethod def generateValidateDict(cls): """ This method generate a independent copy of validateDict for the calling class @ In, None @ Out, None """ cls.validateDict = copy.deepcopy(Model.validateDict) @classmethod def specializeValidateDict(cls): """ This method should be overridden to describe the types of input accepted with a certain role by the model class specialization @ In, None @ Out, None """ raise NotImplementedError('The class '+str(cls.__name__)+' has not implemented the method specializeValidateDict') @classmethod def localValidateMethod(cls,who,what): """ This class method is called to test the compatibility of the class with its possible usage @ In, who, string, a string identifying the what is the role of what we are going to test (i.e. input, output etc) @ In, what, string, a list (or a general iterable) that will be playing the 'who' role @ Out, None """ #counting successful matches if who not in cls.validateDict.keys(): raise IOError('The role "{}" is not recognized for the entity "{}"'.format(who,cls)) for myItemDict in cls.validateDict[who]: myItemDict['tempCounter'] = 0 for anItem in what: anItem['found'] = False for tester in cls.validateDict[who]: if anItem['class'] == tester['class']: if anItem['class']=='Files': #FIXME Files can accept any type, including None. tester['tempCounter']+=1 anItem['found']=True break else: if anItem['type'] in tester['type']: tester['tempCounter'] +=1 anItem['found'] = True break #testing if the multiplicity of the argument is correct for tester in cls.validateDict[who]: if tester['required']==True: if tester['multiplicity']=='n' and tester['tempCounter']<1: raise IOError('The number of times class = '+str(tester['class'])+' type= ' +str(tester['type'])+' is used as '+str(who)+' is improper. At least one object must be present!') if tester['multiplicity']!='n' and tester['tempCounter']!=tester['multiplicity']: raise IOError('The number of times class = '+str(tester['class'])+' type= ' +str(tester['type'])+' is used as '+str(who)+' is improper. Number of allowable times is '+str(tester['multiplicity'])+'.Got '+str(tester['tempCounter'])) #testing if all argument to be tested have been found for anItem in what: if anItem['found']==False: raise IOError('It is not possible to use '+anItem['class']+' type = ' +anItem['type']+' as '+who) return True def __init__(self,runInfoDict): """ Constructor @ In, runInfoDict, dict, the dictionary containing the runInfo (read in the XML input file) @ Out, None """ BaseType.__init__(self) Assembler.__init__(self) #if alias are defined in the input it defines a mapping between the variable names in the framework and the one for the generation of the input #self.alias[framework variable name] = [input code name]. For Example, for a MooseBasedApp, the alias would be self.alias['internal_variable_name'] = 'Material|Fuel|thermal_conductivity' self.alias = {'input':{},'output':{}} self.subType = '' self.runQueue = [] self.printTag = 'MODEL' self.createWorkingDir = False def _readMoreXML(self,xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ Assembler._readMoreXML(self,xmlNode) try: self.subType = xmlNode.attrib['subType'] except KeyError: self.raiseADebug("Failed in Node: "+str(xmlNode),verbostiy='silent') self.raiseAnError(IOError,'missed subType for the model '+self.name) for child in xmlNode: if child.tag =='alias': # the input would be <alias variable='internal_variable_name'>Material|Fuel|thermal_conductivity</alias> if 'variable' in child.attrib.keys(): if 'type' in child.attrib.keys(): if child.attrib['type'].lower() not in ['input','output']: self.raiseAnError(IOError,'the type of alias can be either "input" or "output". Got '+child.attrib['type'].lower()) aliasType = child.attrib['type'].lower().strip() complementAliasType = 'output' if aliasType == 'input' else 'input' else: self.raiseAnError(IOError,'not found the attribute "type" in the definition of one of the alias for model '+str(self.name) +' of type '+self.type) varFramework, varModel = child.attrib['variable'], child.text.strip() if varFramework in self.alias[aliasType].keys(): self.raiseAnError(IOError,' The alias for variable ' +varFramework+' has been already inputted in model '+str(self.name) +' of type '+self.type) if varModel in self.alias[aliasType].values(): self.raiseAnError(IOError,' The alias ' +varModel+' has been already used for another variable in model '+str(self.name) +' of type '+self.type) if varFramework in self.alias[complementAliasType].keys(): self.raiseAnError(IOError,' The alias for variable ' +varFramework+' has been already inputted ('+complementAliasType+') in model '+str(self.name) +' of type '+self.type) if varModel in self.alias[complementAliasType].values(): self.raiseAnError(IOError,' The alias ' +varModel+' has been already used ('+complementAliasType+') for another variable in model '+str(self.name) +' of type '+self.type) self.alias[aliasType][varFramework] = child.text.strip() else: self.raiseAnError(IOError,'not found the attribute "variable" in the definition of one of the alias for model '+str(self.name) +' of type '+self.type) # read local information self.localInputAndChecks(xmlNode) ################# def _replaceVariablesNamesWithAliasSystem(self, sampledVars, aliasType='input', fromModelToFramework=False): """ Method to convert kwargs Sampled vars with the alias system @ In , sampledVars, dict, dictionary that are going to be modified @ In, aliasType, str, optional, type of alias to be replaced @ In, fromModelToFramework, bool, optional, When we define aliases for some input variables, we need to be sure to convert the variable names (if alias is of type input) coming from RAVEN (e.g. sampled variables) into the corresponding names of the model (e.g. frameworkVariableName = "wolf", modelVariableName="thermal_conductivity"). Viceversa, when we define aliases for some model output variables, we need to convert the variable names coming from the model into the one that are used in RAVEN (e.g. modelOutputName="00001111", frameworkVariableName="clad_temperature"). The fromModelToFramework bool flag controls this action (if True, we convert the name in the dictionary from the model names to the RAVEN names, False vice versa) @ Out, originalVariables, dict, dictionary of the original sampled variables """ if aliasType =='inout': listAliasType = ['input','output'] else: listAliasType = [aliasType] originalVariables = copy.deepcopy(sampledVars) for aliasTyp in listAliasType: for varFramework,varModel in self.alias[aliasTyp].items(): whichVar = varModel if fromModelToFramework else varFramework notFound = 2**62 found = sampledVars.pop(whichVar,[notFound]) if not np.array_equal(np.asarray(found), [notFound]): if fromModelToFramework: sampledVars[varFramework] = originalVariables[varModel] else: sampledVars[varModel] = originalVariables[varFramework] return originalVariables def _handleInput(self, paramInput): """ Function to handle the common parts of the model parameter input. @ In, paramInput, ParameterInput, the already parsed input. @ Out, None """ if "subType" in paramInput.parameterValues: self.subType = paramInput.parameterValues["subType"] else: self.raiseADebug(" Failed in Node: "+str(xmlNode),verbostiy='silent') self.raiseAnError(IOError,'missed subType for the model '+self.name) @abc.abstractmethod def evaluateSample(self, myInput, samplerType, kwargs): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, returnValue, tuple(input,dict), This holds the output information of the evaluated sample. """ pass def localInputAndChecks(self,xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ pass def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['subType'] = self.subType for key, value in self.alias['input'].items(): paramDict['The model input variable '+str(value)+' is filled using the framework variable '] = key for key, value in self.alias['output'].items(): paramDict['The model output variable '+str(value)+' is filled using the framework variable '] = key return paramDict def localGetInitParams(self): """ Method used to export to the printer in the base class the additional PERMANENT your local class have @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} return paramDict def initialize(self,runInfo,inputs,initDict=None): """ this needs to be over written if a re initialization of the model is need it gets called at every beginning of a step after this call the next one will be run @ In, runInfo, dict, it is the run info from the jobHandler @ In, inputs, list, it is a list containing whatever is passed with an input role in the step @ In, initDict, dict, optional, dictionary of all objects available in the step is using this model """ pass @abc.abstractmethod def createNewInput(self,myInput,samplerType,**kwargs): """ This function will return a new input to be submitted to the model, it is called by the sampler. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, [(kwargs)], list, return the new input in a list form """ return [(copy.copy(kwargs))] def submit(self, myInput, samplerType, jobHandler, **kwargs): """ This will submit an individual sample to be evaluated by this model to a specified jobHandler. Note, some parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, jobHandler, JobHandler instance, the global job handler instance @ In, **kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars'that contains a dictionary {'name variable':value} @ Out, None """ prefix = kwargs.get("prefix") uniqueHandler = kwargs.get("uniqueHandler",'any') forceThreads = kwargs.get("forceThreads",False) ## These kwargs are updated by createNewInput, so the job either should not ## have access to the metadata, or it needs to be updated from within the ## evaluateSample function, which currently is not possible since that ## function does not know about the job instance. metadata = kwargs ## This may look a little weird, but due to how the parallel python library ## works, we are unable to pass a member function as a job because the ## pp library loses track of what self is, so instead we call it from the ## class and pass self in as the first parameter jobHandler.addJob((self, myInput, samplerType, kwargs), self.__class__.evaluateSample, prefix, metadata=metadata, modulesToImport=self.mods, uniqueHandler=uniqueHandler, forceUseThreads=forceThreads) def addOutputFromExportDictionary(self,exportDict,output,options,jobIdentifier): """ Method that collects the outputs from them export dictionary @ In, exportDict, dict, dictionary containing the output/input values: {'inputSpaceParams':dict(sampled variables), 'outputSpaceParams':dict(output variables), 'metadata':dict(metadata)} @ In, output, "DataObjects" object, output where the results of the calculation needs to be stored @ In, options, dict, dictionary of options that can be passed in when the collect of the output is performed by another model (e.g. EnsembleModel) @ In, jobIdentifier, str, job identifier @ Out, None """ if output.type == 'HDF5': optionsIn = {'group':self.name+str(jobIdentifier)} if options is not None: optionsIn.update(options) output.addGroupDataObjects(optionsIn,exportDict,False) else: self.collectOutputFromDict(exportDict,output,options) def collectOutput(self,collectFrom,storeTo,options=None): """ Method that collects the outputs from the previous run @ In, collectFrom, InternalRunner object, instance of the run just finished @ In, storeTo, "DataObjects" object, output where the results of the calculation needs to be stored @ In, options, dict, optional, dictionary of options that can be passed in when the collect of the output is performed by another model (e.g. EnsembleModel) @ Out, None """ #if a addOutput is present in nameSpace of storeTo it is used if 'addOutput' in dir(storeTo): storeTo.addOutput(collectFrom) else: self.raiseAnError(IOError,'The place where we want to store the output has no addOutput method!') def getAdditionalInputEdits(self,inputInfo): """ Collects additional edits for the sampler to use when creating a new input. By default does nothing. @ In, inputInfo, dict, dictionary in which to add edits @ Out, None. """ pass
class supervisedLearning(utils.metaclass_insert(abc.ABCMeta), MessageHandler.MessageUser): """ This is the general interface to any supervisedLearning learning method. Essentially it contains a train method and an evaluate method """ returnType = '' # this describe the type of information generated the possibility are 'boolean', 'integer', 'float' qualityEstType = [ ] # this describe the type of estimator returned known type are 'distance', 'probability'. The values are returned by the self.__confidenceLocal__(Features) ROMtype = '' # the broad class of the interpolator ROMmultiTarget = False # ROMtimeDependent = False # is this ROM able to treat time-like (any monotonic variable) explicitly in its formulation? @staticmethod def checkArrayConsistency(arrayIn, isDynamic=False): """ This method checks the consistency of the in-array @ In, arrayIn, object, It should be an array @ In, isDynamic, bool, optional, is Dynamic? @ Out, (consistent, 'error msg'), tuple, tuple[0] is a bool (True -> everything is ok, False -> something wrong), tuple[1], string ,the error mesg """ #checking if None provides a more clear message about the problem if arrayIn is None: return (False, ' The object is None, and contains no entries!') if type(arrayIn).__name__ == 'list': if isDynamic: for cnt, elementArray in enumerate(arrayIn): resp = supervisedLearning.checkArrayConsistency( elementArray) if not resp[0]: return (False, ' The element number ' + str(cnt) + ' is not a consistent array. Error: ' + resp[1]) else: return (False, ' The list type is allowed for dynamic ROMs only') else: if type(arrayIn).__name__ not in ['ndarray', 'c1darray']: return (False, ' The object is not a numpy array. Got type: ' + type(arrayIn).__name__) if len(np.asarray(arrayIn).shape) > 1: return (False, ' The array must be 1-d. Got shape: ' + str(np.asarray(arrayIn).shape)) return (True, '') def __init__(self, messageHandler, **kwargs): """ A constructor that will appropriately initialize a supervised learning object @ In, messageHandler, MessageHandler object, it is in charge of raising errors, and printing messages @ In, kwargs, dict, an arbitrary list of kwargs @ Out, None """ self.printTag = 'Supervised' self.messageHandler = messageHandler self._dynamicHandling = False #booleanFlag that controls the normalization procedure. If true, the normalization is performed. Default = True if kwargs != None: self.initOptionDict = kwargs else: self.initOptionDict = {} if 'Features' not in self.initOptionDict.keys(): self.raiseAnError(IOError, 'Feature names not provided') if 'Target' not in self.initOptionDict.keys(): self.raiseAnError(IOError, 'Target name not provided') self.features = self.initOptionDict['Features'].split(',') self.target = self.initOptionDict['Target'].split(',') self.initOptionDict.pop('Target') self.initOptionDict.pop('Features') self.verbosity = self.initOptionDict[ 'verbosity'] if 'verbosity' in self.initOptionDict else None for target in self.target: if self.features.count(target) > 0: self.raiseAnError( IOError, 'The target "' + target + '" is also in the feature space!') #average value and sigma are used for normalization of the feature data #a dictionary where for each feature a tuple (average value, sigma) self.muAndSigmaFeatures = {} #these need to be declared in the child classes!!!! self.amITrained = False def initialize(self, idict): """ Initialization method @ In, idict, dict, dictionary of initialization parameters @ Out, None """ pass #Overloaded by (at least) GaussPolynomialRom def train(self, tdict): """ Method to perform the training of the supervisedLearning algorithm NB.the supervisedLearning object is committed to convert the dictionary that is passed (in), into the local format the interface with the kernels requires. So far the base class will do the translation into numpy @ In, tdict, dict, training dictionary @ Out, None """ if type(tdict) != dict: self.raiseAnError( TypeError, 'In method "train", the training set needs to be provided through a dictionary. Type of the in-object is ' + str(type(tdict))) names, values = list(tdict.keys()), list(tdict.values()) ## This is for handling the special case needed by SKLtype=*MultiTask* that ## requires multiple targets. targetValues = [] for target in self.target: if target in names: targetValues.append(values[names.index(target)]) else: self.raiseAnError( IOError, 'The target ' + target + ' is not in the training set') #FIXME: when we do not support anymore numpy <1.10, remove this IF STATEMENT if int(np.__version__.split('.')[1]) >= 10: targetValues = np.stack(targetValues, axis=-1) else: sl = (slice(None), ) * np.asarray(targetValues[0]).ndim + ( np.newaxis, ) targetValues = np.concatenate( [np.asarray(arr)[sl] for arr in targetValues], axis=np.asarray(targetValues[0]).ndim) # construct the evaluation matrixes featureValues = np.zeros(shape=(len(targetValues), len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the training set') else: valueToUse = values[names.index(feat)] resp = self.checkArrayConsistency(valueToUse, self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) valueToUse = np.asarray(valueToUse) if len(valueToUse) != featureValues[:, 0].size: self.raiseAWarning('feature values:', featureValues[:, 0].size, tag='ERROR') self.raiseAWarning('target values:', len(valueToUse), tag='ERROR') self.raiseAnError( IOError, 'In training set, the number of values provided for feature ' + feat + ' are != number of target outcomes!') self._localNormalizeData(values, names, feat) # valueToUse can be either a matrix (for who can handle time-dep data) or a vector (for who can not) featureValues[:, cnt] = ( (valueToUse[:, 0] if len(valueToUse.shape) > 1 else valueToUse[:]) - self.muAndSigmaFeatures[feat][0] ) / self.muAndSigmaFeatures[feat][1] self.__trainLocal__(featureValues, targetValues) self.amITrained = True def _localNormalizeData(self, values, names, feat): """ Method to normalize data based on the mean and standard deviation. If undesired for a particular ROM, this method can be overloaded to simply pass (see, e.g., GaussPolynomialRom). @ In, values, list, list of feature values (from tdict) @ In, names, list, names of features (from tdict) @ In, feat, list, list of features (from ROM) @ Out, None """ self.muAndSigmaFeatures[feat] = mathUtils.normalizationFactors( values[names.index(feat)]) def confidence(self, edict): """ This call is used to get an estimate of the confidence in the prediction. The base class self.confidence will translate a dictionary into numpy array, then call the local confidence @ In, edict, dict, evaluation dictionary @ Out, confidence, float, the confidence """ if type(edict) != dict: self.raiseAnError( IOError, 'method "confidence". The inquiring set needs to be provided through a dictionary. Type of the in-object is ' + str(type(edict))) names, values = list(edict.keys()), list(edict.values()) for index in range(len(values)): resp = self.checkArrayConsistency(values[index], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In evaluate request for feature ' + names[index] + ':' + resp[1]) featureValues = np.zeros(shape=(values[0].size, len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the evaluate set') else: resp = self.checkArrayConsistency(values[names.index(feat)], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) featureValues[:, cnt] = values[names.index(feat)] return self.__confidenceLocal__(featureValues) def evaluate(self, edict): """ Method to perform the evaluation of a point or a set of points through the previous trained supervisedLearning algorithm NB.the supervisedLearning object is committed to convert the dictionary that is passed (in), into the local format the interface with the kernels requires. @ In, edict, dict, evaluation dictionary @ Out, evaluate, numpy.array, evaluated points """ if type(edict) != dict: self.raiseAnError( IOError, 'method "evaluate". The evaluate request/s need/s to be provided through a dictionary. Type of the in-object is ' + str(type(edict))) names, values = list(edict.keys()), list(edict.values()) for index in range(len(values)): resp = self.checkArrayConsistency(values[index], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In evaluate request for feature ' + names[index] + ':' + resp[1]) # construct the evaluation matrix featureValues = np.zeros(shape=(values[0].size, len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the evaluate set') else: resp = self.checkArrayConsistency(values[names.index(feat)], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) featureValues[:, cnt] = ((values[names.index(feat)] - self.muAndSigmaFeatures[feat][0]) ) / self.muAndSigmaFeatures[feat][1] return self.__evaluateLocal__(featureValues) def reset(self): """ Reset ROM """ self.amITrained = False self.__resetLocal__() def returnInitialParameters(self): """ override this method to return the fix set of parameters of the ROM @ In, None @ Out, iniParDict, dict, initial parameter dictionary """ iniParDict = dict( list(self.initOptionDict.items()) + list({ 'returnType': self.__class__.returnType, 'qualityEstType': self.__class__.qualityEstType, 'Features': self.features, 'Target': self.target, 'returnType': self.__class__.returnType }.items()) + list(self.__returnInitialParametersLocal__().items())) return iniParDict def returnCurrentSetting(self): """ return the set of parameters of the ROM that can change during simulation @ In, None @ Out, currParDict, dict, current parameter dictionary """ currParDict = dict({'Trained': self.amITrained}.items() + self.__CurrentSettingDictLocal__().items()) return currParDict def printXMLSetup(self, outFile, options={}): """ Allows the SVE to put whatever it wants into an XML file only once (right before calling pringXML) @ In, outFile, Files.File, either StaticXMLOutput or DynamicXMLOutput file @ In, options, dict, optional, dict of string-based options to use, including filename, things to print, etc @ Out, None """ outFile.addScalar('ROM', "type", self.printTag) self._localPrintXMLSetup(outFile, options) def _localPrintXMLSetup(self, outFile, pivotVal, options={}): """ Specific local method for printing anything desired to xml file at the begin of the print. Overwrite in inheriting classes. @ In, outFile, Files.File, either StaticXMLOutput or DynamicXMLOutput file @ In, options, dict, optional, dict of string-based options to use, including filename, things to print, etc @ Out, None """ pass def printXML(self, outFile, pivotVal, options={}): """ Allows the SVE to put whatever it wants into an XML to print to file. @ In, outFile, Files.File, either StaticXMLOutput or DynamicXMLOutput file @ In, pivotVal, float, value of pivot parameters to use in printing if dynamic @ In, options, dict, optional, dict of string-based options to use, including filename, things to print, etc @ Out, None """ self._localPrintXML(outFile, pivotVal, options) def _localPrintXML(self, node, options={}): """ Specific local method for printing anything desired to xml file. Overwrite in inheriting classes. @ In, outFile, Files.File, either StaticXMLOutput or DynamicXMLOutput file @ In, options, dict, optional, dict of string-based options to use, including filename, things to print, etc @ Out, None """ outFile.addScalar( 'ROM', "noInfo", 'ROM of type ' + str(self.printTag.strip()) + ' has no special output options.') def isDynamic(self): """ This method is a utility function that tells if the relative ROM is able to treat dynamic data (e.g. time-series) on its own or not (Primarly called by LearningGate) @ In, None @ Out, isDynamic, bool, True if the ROM is able to treat dynamic data, False otherwise """ return self._dynamicHandling def reseed(self, seed): """ Used to reset the seed of the ROM. By default does nothing; overwrite in the inheriting classes as needed. @ In, seed, int, new seed to use @ Out, None """ return @abc.abstractmethod def __trainLocal__(self, featureVals, targetVals): """ Perform training on samples in featureVals with responses y. For an one-class model, +1 or -1 is returned. @ In, featureVals, {array-like, sparse matrix}, shape=[n_samples, n_features], an array of input feature values @ Out, targetVals, array, shape = [n_samples], an array of output target associated with the corresponding points in featureVals """ @abc.abstractmethod def __confidenceLocal__(self, featureVals): """ This should return an estimation of the quality of the prediction. This could be distance or probability or anything else, the type needs to be declared in the variable cls.qualityEstType @ In, featureVals, 2-D numpy array , [n_samples,n_features] @ Out, __confidenceLocal__, float, the confidence """ @abc.abstractmethod def __evaluateLocal__(self, featureVals): """ @ In, featureVals, np.array, 2-D numpy array [n_samples,n_features] @ Out, targetVals , np.array, 1-D numpy array [n_samples] """ @abc.abstractmethod def __resetLocal__(self): """ Reset ROM. After this method the ROM should be described only by the initial parameter settings @ In, None @ Out, None """ @abc.abstractmethod def __returnInitialParametersLocal__(self): """ Returns a dictionary with the parameters and their initial values @ In, None @ Out, params, dict, dictionary of parameter names and initial values """ @abc.abstractmethod def __returnCurrentSettingLocal__(self): """
class CodeInterfaceBase(utils.metaclass_insert(abc.ABCMeta, object)): """ Code Interface base class. This class should be the base class for all the code interfaces. In this way some methods are forced to be implemented and some automatic checking features are available (checking of the inputs if no executable is available), etc. NOTE: As said, this class SHOULD be the base class of the code interfaces. However, the developer of a newer code interface can decide to avoid to inherit from this class if he does not want to exploit the automatic checking of the code interface's functionalities """ def __init__(self): """ Constructor @ In, None @ Out, None """ self.inputExtensions = [] # list of input extensions self._runOnShell = True # True if the specified command by the code interfaces will be executed through shell. self._ravenWorkingDir = None # location of RAVEN's main working directory self._csvLoadUtil = 'pandas' # utility to use to load CSVs self.printFailedRuns = True # whether to print failed runs to the screen self._writeCSV = False # write CSV even if the data can be returned directly to raven (e.g. if the user requests them) def setRunOnShell(self, shell=True): """ Method used to set the the executation of code command through shell if shell=True @ In, shell, Boolean, True if the users want to execute their code through shell @ Out, None """ self._runOnShell = shell def getRunOnShell(self): """ Method to return the status of self._runOnShell @ In, None @ Out, None """ return self._runOnShell def getIfWriteCsv(self): """ Returns self._writeCSV. True if a CSV is requested by the user even if the code interface returns the data to RAVEN directly @ In, None @ Out, getIfWriteCsv, bool, should we write the csv? """ return self._writeCSV def getCsvLoadUtil(self): """ Returns the string representation of the CSV loading utility to use @ In, None @ Out, getCsvLoadUtil, str, name of utility to use """ # default to pandas, overwrite to 'numpy' if all of the following: # - all entries are guaranteed to be floats # - results CSV have a large number of headers (>1000) return self._csvLoadUtil def setCsvLoadUtil(self, util): """ Returns the string representation of the CSV loading utility to use @ In, getCsvLoadUtil, str, name of utility to use """ ok = CsvLoader.CsvLoader.acceptableUtils if util not in ok: raise TypeError( f'Unrecognized CSV loading utility: "{util}"! Expected one of: {ok}' ) self._csvLoadUtil = util def genCommand(self, inputFiles, executable, flags=None, fileArgs=None, preExec=None): """ This method is used to retrieve the command (in tuple format) needed to launch the Code. This method checks a boolean environment variable called 'RAVENinterfaceCheck': if true, the subcodeCommand is going to be overwritten with an empty string. In this way we can check the functionality of the interface without having an executable. See Driver.py to understand how this Env variable is set @ In, inputFiles, list, List of input files (length of the list depends on the number of inputs have been added in the Step is running this code) @ In, executable, string, executable name with absolute path (e.g. /home/path_to_executable/code.exe) @ In, flags, dict, optional, dictionary containing the command-line flags the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 i0extension =0 .inp0/ >< /Code >) @ In, fileArgs, dict, optional, a dictionary containing the auxiliary input file variables the user can specify in the input (e.g. under the node < Code >< fileargstype =0 input0arg =0 aux0extension =0 .aux0/ >< /Code >) @ In, preExec, string, optional, a string the command that needs to be pre-executed before the actual command here defined @ Out, returnCommand, tuple, tuple containing the generated command. returnCommand[0] is the command to run the code (string), returnCommand[1] is the name of the output root """ subcodeCommand, outputfileroot = self.generateCommand(inputFiles, executable, clargs=flags, fargs=fileArgs, preExec=preExec) if utils.stringIsTrue(os.environ.get('RAVENinterfaceCheck', 'False')): return [('parallel', 'echo')], outputfileroot returnCommand = subcodeCommand, outputfileroot return returnCommand def readMoreXML(self, xmlNode, ravenWorkingDir): """ Function to read the portion of the xml input that belongs to this class and initialize some members based on inputs. @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ In, ravenWorkingDir, str, location of RAVEN's working directory @ Out, None """ self._ravenWorkingDir = ravenWorkingDir self._readMoreXML(xmlNode) # read global options # should we print CSV even if the data can be directly returned to RAVEN? csvLog = xmlNode.find("csv") self._writeCSV = utils.stringIsTrue( csvLog.text if csvLog is not None else "False") def _readMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some members based on inputs. This can be overloaded in specialized code interface in order to read specific flags @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ pass @abc.abstractmethod def generateCommand(self, inputFiles, executable, clargs=None, fargs=None, preExec=None): """ This method is used to retrieve the command (in tuple format) needed to launch the Code. @ In, inputFiles, list, List of input files (length of the list depends on the number of inputs have been added in the Step is running this code) @ In, executable, string, executable name with absolute path (e.g. /home/path_to_executable/code.exe) @ In, clargs, dict, optional, dictionary containing the command-line flags the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 i0extension =0 .inp0/ >< /Code >) @ In, fargs, dict, optional, a dictionary containing the auxiliary input file variables the user can specify in the input (e.g. under the node < Code >< clargstype =0 input0arg =0 aux0extension =0 .aux0/ >< /Code >) @ In, preExec, string, optional, a string the command that needs to be pre-executed before the actual command here defined @ Out, returnCommand, tuple, tuple containing the generated command. returnCommand[0] is the command to run the code (string), returnCommand[1] is the name of the output root """ return @abc.abstractmethod def createNewInput(self, currentInputFiles, oriInputFiles, samplerType, **Kwargs): """ This method is used to generate an input based on the information passed in. @ In, currentInputFiles, list, list of current input files (input files from last this method call) @ In, oriInputFiles, list, list of the original input files @ In, samplerType, string, Sampler type (e.g. MonteCarlo, Adaptive, etc. see manual Samplers section) @ In, Kwargs, dictionary, kwarded dictionary of parameters. In this dictionary there is another dictionary called "SampledVars" where RAVEN stores the variables that got sampled (e.g. Kwargs['SampledVars'] => {'var1':10,'var2':40}) @ Out, newInputFiles, list, list of newer input files, list of the new input files (modified and not) """ pass #################### ####### OPTIONAL METHODS ####### #################### def getInputExtension(self): """ This method returns a list of extension the code interface accepts for the input file (the main one) @ In, None @ Out, tuple, tuple of strings containing accepted input extension (e.g.(".i",".inp"]) ) """ return tuple(self.inputExtensions) def setInputExtension(self, exts): """ This method sets a list of extension the code interface accepts for the input files @ In, exts, list, list or other array containing accepted input extension (e.g.[".i",".inp"]) @ Out, None """ self.inputExtensions = [] self.addInputExtension(exts) def addInputExtension(self, exts): """ This method adds a list of extension the code interface accepts for the input files @ In, exts, list, list or other array containing accepted input extension (e.g.[".i",".inp"]) @ Out, None """ for e in exts: self.inputExtensions.append(e) def addDefaultExtension(self): """ This method sets a list of default extensions a specific code interface accepts for the input files. This method should be overwritten if these are not acceptable defaults. @ In, None @ Out, None """ self.addInputExtension(['i', 'inp', 'in']) def initialize(self, runInfo, oriInputFiles): """ Method to initialize the run of a new step @ In, runInfo, dict, dictionary of the info in the <RunInfo> XML block @ In, oriInputFiles, list, list of the original input files @ Out, None """ # store working dir for future needs self._ravenWorkingDir = runInfo['WorkingDir'] def finalizeCodeOutput(self, command, output, workingDir): """ this method is called by the RAVEN code at the end of each run (if the method is present). It can be used for those codes, that do not create CSV files to convert the whatever output format into a csv @ In, command, string, the command used to run the just ended job @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, output, string or dict, optional, if present and string: in case the root of the output file gets changed in this method (and a CSV is produced); if present and dict: in case the output of the code is directly stored in a dictionary and can be directly used without the need that RAVEN reads an additional CSV """ return output def checkForOutputFailure(self, output, workingDir): """ This method is called by RAVEN at the end of each run if the return code is == 0. This method needs to be implemented by the codes that, if the run fails, return a return code that is 0 This can happen in those codes that record the failure of the job (e.g. not converged, etc.) as normal termination (returncode == 0) This method can be used, for example, to parse the output file looking for a special keyword that testifies that a particular job got failed (e.g. in RELAP5 would be the keyword "********") @ In, output, string, the Output name root @ In, workingDir, string, current working dir @ Out, failure, bool, True if the job is failed, False otherwise """ failure = False return failure
class Sampler(utils.metaclass_insert(abc.ABCMeta,BaseType),Assembler): """ This is the base class for samplers Samplers own the sampling strategy (Type) and they generate the input values using the associate distribution. """ #### INITIALIZATION METHODS #### @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, cls, the class for which we are retrieving the specification @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying input of cls. """ inputSpecification = super(Sampler, cls).getInputSpecification() # FIXME the DET HybridSampler doesn't use the "name" param for the samples it creates, # so we can't require the name yet inputSpecification.addParam("name", InputData.StringType) outerDistributionInput = InputData.parameterInputFactory("Distribution") outerDistributionInput.addParam("name", InputData.StringType) outerDistributionInput.addSub(InputData.parameterInputFactory("distribution", contentType=InputData.StringType)) inputSpecification.addSub(outerDistributionInput) variableInput = InputData.parameterInputFactory("variable") variableInput.addParam("name", InputData.StringType) variableInput.addParam("shape", InputData.IntegerListType, required=False) distributionInput = InputData.parameterInputFactory("distribution", contentType=InputData.StringType) distributionInput.addParam("dim", InputData.IntegerType) variableInput.addSub(distributionInput) functionInput = InputData.parameterInputFactory("function", contentType=InputData.StringType) variableInput.addSub(functionInput) inputSpecification.addSub(variableInput) variablesTransformationInput = InputData.parameterInputFactory("variablesTransformation") variablesTransformationInput.addParam('distribution', InputData.StringType) variablesTransformationInput.addSub(InputData.parameterInputFactory("latentVariables", contentType=InputData.StringListType)) variablesTransformationInput.addSub(InputData.parameterInputFactory("manifestVariables", contentType=InputData.StringListType)) variablesTransformationInput.addSub(InputData.parameterInputFactory("manifestVariablesIndex", contentType=InputData.StringListType)) variablesTransformationInput.addSub(InputData.parameterInputFactory("method", contentType=InputData.StringType)) inputSpecification.addSub(variablesTransformationInput) constantInput = InputData.parameterInputFactory("constant", contentType=InputData.InterpretedListType) constantInput.addParam("name", InputData.StringType, True) constantInput.addParam("shape", InputData.IntegerListType, required=False) inputSpecification.addSub(constantInput) restartToleranceInput = InputData.parameterInputFactory("restartTolerance", contentType=InputData.FloatType) inputSpecification.addSub(restartToleranceInput) restartInput = InputData.parameterInputFactory("Restart", contentType=InputData.StringType) restartInput.addParam("type", InputData.StringType) restartInput.addParam("class", InputData.StringType) inputSpecification.addSub(restartInput) return inputSpecification def __init__(self): """ Default Constructor that will initialize member variables with reasonable defaults or empty lists/dictionaries where applicable. @ In, None @ Out, None """ BaseType.__init__(self) Assembler.__init__(self) self.ableToHandelFailedRuns = False # is this sampler able to handle failed runs? self.counter = 0 # Counter of the samples performed (better the input generated!!!). It is reset by calling the function self.initialize self.auxcnt = 0 # Aux counter of samples performed (for its usage check initialize method) self.limit = sys.maxsize # maximum number of Samples (for example, Monte Carlo = Number of HistorySet to run, DET = Unlimited) self.toBeSampled = {} # Sampling mapping dictionary {'Variable Name':'name of the distribution'} self.dependentSample = {} # Sampling mapping dictionary for dependent variables {'Variable Name':'name of the external function'} self.distDict = {} # Contains the instance of the distribution to be used, it is created every time the sampler is initialized. keys are the variable names self.funcDict = {} # Contains the instance of the function to be used, it is created every time the sampler is initialized. keys are the variable names self.values = {} # for each variable the current value {'var name':value} self.variableShapes = {} # stores the dimensionality of each variable by name, as tuple e.g. (2,3) for [[#,#,#],[#,#,#]] self.inputInfo = {} # depending on the sampler several different type of keywarded information could be present only one is mandatory, see below self.initSeed = None # if not provided the seed is randomly generated at the istanciation of the sampler, the step can override the seed by sending in another seed self.inputInfo['SampledVars' ] = self.values # this is the location where to get the values of the sampled variables self.inputInfo['SampledVarsPb' ] = {} # this is the location where to get the probability of the sampled variables #self.inputInfo['PointProbability'] = None # this is the location where the point wise probability is stored (probability associated to a sampled point) self.inputInfo['crowDist'] = {} # Stores a dictionary that contains the information to create a crow distribution. Stored as a json object self.constants = {} # In this dictionary self.reseedAtEachIteration = False # Logical flag. True if every newer evaluation is performed after a new reseeding self.FIXME = False # FIXME flag self.printTag = self.type # prefix for all prints (sampler type) self.restartData = None # presampled points to restart from self.restartTolerance = 1e-15 # strictness with which to find matches in the restart data self.restartIsCompatible = None # flags restart as compatible with the sampling scheme (used to speed up checking) self._endJobRunnable = sys.maxsize # max number of inputs creatable by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) ###### self.variables2distributionsMapping = {} # for each variable 'varName' , the following informations are included: 'varName': {'dim': 1, 'reducedDim': 1,'totDim': 2, 'name': 'distName'} ; dim = dimension of the variable; reducedDim = dimension of the variable in the transformed space; totDim = total dimensionality of its associated distribution self.distributions2variablesMapping = {} # for each variable 'distName' , the following informations are included: 'distName': [{'var1': 1}, {'var2': 2}]} where for each var it is indicated the var dimension self.NDSamplingParams = {} # this dictionary contains a dictionary for each ND distribution (key). This latter dictionary contains the initialization parameters of the ND inverseCDF ('initialGridDisc' and 'tolerance') ###### self.addAssemblerObject('Restart' ,'-n',True) #used for PCA analysis self.variablesTransformationDict = {} # for each variable 'modelName', the following informations are included: {'modelName': {latentVariables:[latentVar1, latentVar2, ...], manifestVariables:[manifestVar1,manifestVar2,...]}} self.transformationMethod = {} # transformation method used in variablesTransformation node {'modelName':method} self.entitiesToRemove = [] # This variable is used in order to make sure the transformation info is printed once in the output xml file. def _generateDistributions(self,availableDist,availableFunc): """ Generates the distributions and functions. @ In, availableDist, dict, dict of distributions @ In, availableFunc, dict, dict of functions @ Out, None """ if self.initSeed != None: randomUtils.randomSeed(self.initSeed) for key in self.toBeSampled.keys(): if self.toBeSampled[key] not in availableDist.keys(): self.raiseAnError(IOError,'Distribution '+self.toBeSampled[key]+' not found among available distributions (check input)!') self.distDict[key] = availableDist[self.toBeSampled[key]] self.inputInfo['crowDist'][key] = json.dumps(self.distDict[key].getCrowDistDict()) for key,val in self.dependentSample.items(): if val not in availableFunc.keys(): self.raiseAnError('Function',val,'was not found among the available functions:',availableFunc.keys()) self.funcDict[key] = availableFunc[val] # check if the correct method is present if "evaluate" not in self.funcDict[key].availableMethods(): self.raiseAnError(IOError,'Function '+self.funcDict[key].name+' does not contain a method named "evaluate". It must be present if this needs to be used in a Sampler!') def _localGenerateAssembler(self,initDict): """ It is used for sending to the instanciated class, which is implementing the method, the objects that have been requested through "whatDoINeed" method It is an abstract method -> It must be implemented in the derived class! @ In, initDict, dict, dictionary ({'mainClassName(e.g., Databases):{specializedObjectName(e.g.,DatabaseForSystemCodeNamedWolf):ObjectInstance}'}) @ Out, None """ availableDist = initDict['Distributions'] availableFunc = initDict['Functions'] self._generateDistributions(availableDist,availableFunc) def _localWhatDoINeed(self): """ This method is a local mirror of the general whatDoINeed method. It is implemented by the samplers that need to request special objects @ In, None @ Out, needDict, dict, list of objects needed """ needDict = {} needDict['Distributions'] = [] # Every sampler requires Distributions OR a Function needDict['Functions'] = [] # Every sampler requires Distributions OR a Function for dist in self.toBeSampled.values(): needDict['Distributions'].append((None,dist)) for func in self.dependentSample.values(): needDict['Functions'].append((None,func)) return needDict def _readMoreXML(self,xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got The text is supposed to contain the info where and which variable to change. In case of a code the syntax is specified by the code interface itself @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ #TODO remove using xmlNode Assembler._readMoreXML(self,xmlNode) paramInput = self._readMoreXMLbase(xmlNode) self.localInputAndChecks(xmlNode, paramInput) def _readMoreXMLbase(self,xmlNode): """ Function to read the portion of the xml input that belongs to the base sampler only and initialize some stuff based on the inputs got The text is supposed to contain the info where and which variable to change. In case of a code the syntax is specified by the code interface itself @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node1 @ Out, paramInput, InputData.ParameterInput the parsed paramInput """ paramInput = self.getInputSpecification()() paramInput.parseNode(xmlNode) for child in paramInput.subparts: prefix = "" if child.getName() == 'Distribution': for childChild in child.subparts: if childChild.getName() =='distribution': prefix = "<distribution>" toBeSampled = childChild.value self.toBeSampled[prefix+child.parameterValues['name']] = toBeSampled elif child.getName() == 'variable': # variable for tracking if distributions or functions have been declared foundDistOrFunc = False # store variable name for re-use varName = child.parameterValues['name'] # set shape if present if 'shape' in child.parameterValues: self.variableShapes[varName] = child.parameterValues['shape'] # read subnodes for childChild in child.subparts: if childChild.getName() =='distribution': # can only have a distribution if doesn't already have a distribution or function if not foundDistOrFunc: foundDistOrFunc = True else: self.raiseAnError(IOError,'A sampled variable cannot have both a distribution and a function, or more than one of either!') # name of the distribution to sample toBeSampled = childChild.value varData={} varData['name']=childChild.value # variable dimensionality if 'dim' not in childChild.parameterValues: dim=1 else: dim=childChild.parameterValues['dim'] varData['dim']=dim # set up mapping for variable to distribution self.variables2distributionsMapping[varName] = varData # flag distribution as needing to be sampled self.toBeSampled[prefix+varName] = toBeSampled elif childChild.getName() == 'function': # can only have a function if doesn't already have a distribution or function if not foundDistOrFunc: foundDistOrFunc = True else: self.raiseAnError(IOError,'A sampled variable cannot have both a distribution and a function!') # function name toBeSampled = childChild.value # track variable as a functional sample self.dependentSample[prefix+varName] = toBeSampled if not foundDistOrFunc: self.raiseAnError(IOError,'Sampled variable',varName,'has neither a <distribution> nor <function> node specified!') elif child.getName() == "variablesTransformation": transformationDict = {} listIndex = None for childChild in child.subparts: if childChild.getName() == "latentVariables": transformationDict[childChild.getName()] = list(childChild.value) elif childChild.getName() == "manifestVariables": transformationDict[childChild.getName()] = list(childChild.value) elif childChild.getName() == "manifestVariablesIndex": # the index provided by the input file starts from 1, but the index used by the code starts from 0. listIndex = list(int(inp) - 1 for inp in childChild.value) elif childChild.getName() == "method": self.transformationMethod[child.parameterValues['distribution']] = childChild.value if listIndex == None: self.raiseAWarning('Index is not provided for manifestVariables, default index will be used instead!') listIndex = range(len(transformationDict["manifestVariables"])) transformationDict["manifestVariablesIndex"] = listIndex self.variablesTransformationDict[child.parameterValues['distribution']] = transformationDict elif child.getName() == "constant": name,value = self._readInConstant(child) self.constants[name] = value elif child.getName() == "restartTolerance": self.restartTolerance = child.value if len(self.constants) > 0: # check if constant variables are also part of the sampled space. In case, error out if not set(self.toBeSampled.keys()).isdisjoint(self.constants.keys()): self.raiseAnError(IOError,"Some constant variables are also in the sampling space:" + ' '.join([i if i in self.toBeSampled.keys() else "" for i in self.constants.keys()]) ) if self.initSeed == None: self.initSeed = randomUtils.randomIntegers(0,2**31,self) # Creation of the self.distributions2variablesMapping dictionary: {'distName': [{'variable_name1': dim1}, {'variable_name2': dim2}]} for variable in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[variable]['name'] dim = self.variables2distributionsMapping[variable]['dim'] listElement={} listElement[variable] = dim if (distName in self.distributions2variablesMapping.keys()): self.distributions2variablesMapping[distName].append(listElement) else: self.distributions2variablesMapping[distName]=[listElement] # creation of the self.distributions2variablesIndexList dictionary:{'distName':[dim1,dim2,...,dimN]} self.distributions2variablesIndexList = {} for distName in self.distributions2variablesMapping.keys(): positionList = [] for var in self.distributions2variablesMapping[distName]: position = utils.first(var.values()) positionList.append(position) if sum(set(positionList)) > 1 and len(positionList) != len(set(positionList)): dups = set(str(var) for var in positionList if positionList.count(var) > 1) self.raiseAnError(IOError,'Each of the following dimensions are assigned to multiple variables in Samplers: "{}"'.format(', '.join(dups)), ' associated to ND distribution ', distName, '. This is currently not allowed!') positionList = list(set(positionList)) positionList.sort() self.distributions2variablesIndexList[distName] = positionList for key in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[key]['name'] dim = self.variables2distributionsMapping[key]['dim'] reducedDim = self.distributions2variablesIndexList[distName].index(dim) + 1 self.variables2distributionsMapping[key]['reducedDim'] = reducedDim # the dimension of variable in the transformed space self.variables2distributionsMapping[key]['totDim'] = max(self.distributions2variablesIndexList[distName]) # We will reset the value if the node <variablesTransformation> exist in the raven input file if not self.variablesTransformationDict and self.variables2distributionsMapping[key]['totDim'] > 1: if self.variables2distributionsMapping[key]['totDim'] != len(self.distributions2variablesIndexList[distName]): self.raiseAnError(IOError,'The "dim" assigned to the variables insider Sampler are not correct! the "dim" should start from 1, and end with the full dimension of given distribution') #Checking the variables transformation if self.variablesTransformationDict: for dist,varsDict in self.variablesTransformationDict.items(): maxDim = len(varsDict['manifestVariables']) listLatentElement = varsDict['latentVariables'] if len(set(listLatentElement)) != len(listLatentElement): dups = set(var for var in listLatentElement if listLatentElement.count(var) > 1) self.raiseAnError(IOError,'The following are duplicated variables listed in the latentVariables: ' + str(dups)) if len(set(varsDict['manifestVariables'])) != len(varsDict['manifestVariables']): dups = set(var for var in varsDict['manifestVariables'] if varsDict['manifestVariables'].count(var) > 1) self.raiseAnError(IOError,'The following are duplicated variables listed in the manifestVariables: ' + str(dups)) if len(set(varsDict['manifestVariablesIndex'])) != len(varsDict['manifestVariablesIndex']): dups = set(var+1 for var in varsDict['manifestVariablesIndex'] if varsDict['manifestVariablesIndex'].count(var) > 1) self.raiseAnError(IOError,'The following are duplicated variables indices listed in the manifestVariablesIndex: ' + str(dups)) listElement = self.distributions2variablesMapping[dist] for var in listElement: self.variables2distributionsMapping[utils.first(var.keys())]['totDim'] = maxDim #reset the totDim to reflect the totDim of original input space tempListElement = {k.strip():v for x in listElement for ks,v in x.items() for k in list(ks.strip().split(','))} listIndex = [] for var in listLatentElement: if var not in set(tempListElement.keys()): self.raiseAnError(IOError, 'The variable listed in latentVariables ' + var + ' is not listed in the given distribution: ' + dist) listIndex.append(tempListElement[var]-1) if max(listIndex) > maxDim: self.raiseAnError(IOError,'The maximum dim = ' + str(max(listIndex)) + ' defined for latent variables is exceeded the dimension of the problem ' + str(maxDim)) if len(set(listIndex)) != len(listIndex): dups = set(var+1 for var in listIndex if listIndex.count(var) > 1) self.raiseAnError(IOError,'Each of the following dimensions are assigned to multiple latent variables in Samplers: ' + str(dups)) # update the index for latentVariables according to the 'dim' assigned for given var defined in Sampler self.variablesTransformationDict[dist]['latentVariablesIndex'] = listIndex return paramInput def _readInConstant(self,inp): """ Reads in a "constant" input parameter node. @ In, inp, utils.InputParameter.ParameterInput, input parameter node to read from @ Out, name, string, name of constant @ Out, value, float or np.array, """ value = inp.value name = inp.parameterValues['name'] shape = inp.parameterValues.get('shape',None) # if single entry, remove array structure; if multiple entries, cast them as numpy array if len(value) == 1: value = value[0] else: value = np.asarray(value) # if specific shape requested, then reshape it if shape is not None: try: value = value.reshape(shape) except ValueError: self.raiseAnError(IOError, ('Requested shape "{}" ({} entries) for constant "{}"' +\ ' is not consistent with the provided values ({} entries)!') .format(shape,np.prod(shape),name,len(value))) return name, value def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} for variable in self.toBeSampled.items(): paramDict["sampled variable: "+variable[0]] = 'is sampled using the distribution ' +variable[1] paramDict['limit' ] = self.limit paramDict['initial seed' ] = self.initSeed paramDict.update(self.localGetInitParams()) return paramDict def initialize(self,externalSeeding=None,solutionExport=None): """ This function should be called every time a clean sampler is needed. Called before takeAstep in <Step> @ In, externalSeeding, int, optional, external seed @ In, solutionExport, DataObject, optional, in goal oriented sampling (a.k.a. adaptive sampling this is where the space/point satisfying the constrains) @ Out, None """ if self.initSeed == None: self.initSeed = randomUtils.randomIntegers(0,2**31,self) self.counter = 0 if not externalSeeding: randomUtils.randomSeed(self.initSeed) #use the sampler initialization seed self.auxcnt = self.initSeed elif externalSeeding=='continue': pass #in this case the random sequence needs to be preserved else : randomUtils.randomSeed(externalSeeding) #the external seeding is used self.auxcnt = externalSeeding #grab restart dataobject if it's available, then in localInitialize the sampler can deal with it. if 'Restart' in self.assemblerDict.keys(): self.raiseADebug('Restart object: '+str(self.assemblerDict['Restart'])) self.restartData = self.assemblerDict['Restart'][0][3] # check the right variables are in the restart need = set(self.toBeSampled.keys()+self.dependentSample.keys()) if not need.issubset(set(self.restartData.getVars())): missing = need - set(self.restartData.getVars()) #TODO this could be a warning, instead, but user wouldn't see it until the run was deep in self.raiseAnError(KeyError,'Restart data object "{}" is missing the following variables: "{}". No restart can be performed.'.format(self.restartData.name,', '.join(missing))) else: self.raiseAMessage('Restarting from '+self.restartData.name) # we used to check distribution consistency here, but we want to give more flexibility to using # restart data, so do NOT check distributions of restart data. else: self.raiseAMessage('No restart for '+self.printTag) #load restart data into existing points # TODO do not copy data! Read directly from restart. #if self.restartData is not None: # if len(self.restartData) > 0: # inps = self.restartData.getInpParametersValues() # outs = self.restartData.getOutParametersValues() # #FIXME there is no guarantee ordering is accurate between restart data and sampler # inputs = list(v for v in inps.values()) # existingInps = zip(*inputs) # outVals = zip(*list(v for v in outs.values())) # self.existing = dict(zip(existingInps,outVals)) #specializing the self.localInitialize() to account for adaptive sampling if solutionExport != None: self.localInitialize(solutionExport=solutionExport) else: self.localInitialize() for distrib in self.NDSamplingParams: if distrib in self.distributions2variablesMapping: params = self.NDSamplingParams[distrib] temp = utils.first(self.distributions2variablesMapping[distrib][0].keys()) self.distDict[temp].updateRNGParam(params) else: self.raiseAnError(IOError,'Distribution "%s" specified in distInit block of sampler "%s" does not exist!' %(distrib,self.name)) # Store the transformation matrix in the metadata if self.variablesTransformationDict: self.entitiesToRemove = [] for variable in self.variables2distributionsMapping.keys(): distName = self.variables2distributionsMapping[variable]['name'] dim = self.variables2distributionsMapping[variable]['dim'] totDim = self.variables2distributionsMapping[variable]['totDim'] if totDim > 1 and dim == 1: transformDict = {} transformDict['type'] = self.distDict[variable.strip()].type transformDict['transformationMatrix'] = self.distDict[variable.strip()].transformationMatrix() self.inputInfo['transformation-'+distName] = transformDict self.entitiesToRemove.append('transformation-'+distName) # Register expected metadata meta = ['ProbabilityWeight','prefix','PointProbability'] for var in self.toBeSampled.keys(): meta += ['ProbabilityWeight-'+ key for key in var.split(",")] self.addMetaKeys(*meta) def localGetInitParams(self): """ Method used to export to the printer in the base class the additional PERMANENT your local class have @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def localInitialize(self): """ use this function to add initialization features to the derived class it is call at the beginning of each step @ In, None @ Out, None """ pass def localInputAndChecks(self,xmlNode, paramInput): """ Local method. Place here the additional reading, remember to add initial parameters in the method localGetInitParams @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ In, paramInput, InputData.ParameterInput, the parsed parameters @ Out, None """ pass def readSamplerInit(self,xmlNode): """ This method is responsible to read only the samplerInit block in the .xml file. This method has been moved from the base sampler class since the samplerInit block is needed only for the MC and stratified (LHS) samplers @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ #TODO, this is redundant and paramInput should be directly passed in. paramInput = self.getInputSpecification()() paramInput.parseNode(xmlNode) for child in paramInput.subparts: if child.getName() == "samplerInit": self.initSeed = randomUtils.randomIntegers(0,2**31,self) for childChild in child.subparts: if childChild.getName() == "limit": try: self.limit = int(childChild.value) except ValueError: self.raiseAnError(IOError,'reading the attribute for the sampler '+self.name+' it was not possible to perform the conversion to integer for the attribute limit with value ' + str(childChild.value)) if childChild.getName() == "initialSeed": try: self.initSeed = int(childChild.value) except ValueError: self.raiseAnError(IOError,'reading the attribute for the sampler '+self.name+' it was not possible to perform the conversion to integer for the attribute initialSeed with value ' + str(childChild.value)) elif childChild.getName() == "reseedEachIteration": if childChild.value.lower() in utils.stringsThatMeanTrue(): self.reseedAtEachIteration = True elif childChild.getName() == "distInit": for childChildChild in childChild.subparts: NDdistData = {} for childChildChildChild in childChildChild.subparts: if childChildChildChild.getName() == 'initialGridDisc': NDdistData[childChildChildChild.getName()] = int(childChildChildChild.value) elif childChildChildChild.getName() == 'tolerance': NDdistData[childChildChildChild.getName()] = float(childChildChildChild.value) else: self.raiseAnError(IOError,'Unknown tag '+childChildChildChild.getName()+' .Available are: initialGridDisc and tolerance!') self.NDSamplingParams[childChildChild.parameterValues['name']] = NDdistData #### GETTERS AND SETTERS #### def endJobRunnable(self): """ Returns the maximum number of inputs allowed to be created by the sampler right after a job ends (e.g., infinite for MC, 1 for Adaptive, etc) @ In, None @ Out, endJobRunnable, int, number of runnable jobs at the end of each sample """ return self._endJobRunnable def getCurrentSetting(self): """ This function is called from the base class to print some of the information inside the class. Whatever is a temporary value in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['counter' ] = self.counter paramDict['initial seed' ] = self.initSeed for key in self.inputInfo: if key!='SampledVars': paramDict[key] = self.inputInfo[key] else: for var in self.inputInfo['SampledVars'].keys(): paramDict['Variable: '+var+' has value'] = paramDict[key][var] paramDict.update(self.localGetCurrentSetting()) return paramDict def localGetCurrentSetting(self): """ Returns a dictionary with class specific information regarding the current status of the object. @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} #### SAMPLING METHODS #### def amIreadyToProvideAnInput(self): #inLastOutput=None): """ This is a method that should be call from any user of the sampler before requiring the generation of a new sample. This method act as a "traffic light" for generating a new input. Reason for not being ready could be for example: exceeding number of samples, waiting for other simulation for providing more information etc. etc. @ In, None @ Out, ready, bool, is this sampler ready to generate another sample? """ ready = True if self.counter < self.limit else False ready = self.localStillReady(ready) return ready def localStillReady(self,ready): """ Determines if sampler is prepared to provide another input. If not, and if jobHandler is finished, this will end sampling. @ In, ready, bool, a boolean representing whether the caller is prepared for another input. @ Out, ready, bool, a boolean representing whether the caller is prepared for another input. """ return ready def _checkRestartForEvaluation(self): """ Checks restart data object (if any) for matching realization. @ In, None @ Out, index, int, index of matching realization in restart (None if not found) @ Out, inExisting, dict, matching realization (None if not found) """ #check if point already exists if self.restartData is not None: index,inExisting = self.restartData.realization(matchDict=self.values,tol=self.restartTolerance,unpackXArray=True) else: index = None inExisting = None return index,inExisting def _constantVariables(self): """ Method to set the constant variables into the inputInfo dictionary @ In, None @ Out, None """ if len(self.constants) > 0: # we inject the constant variables into the SampledVars self.inputInfo['SampledVars' ].update(self.constants) # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight) self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(),1.0)) pbKey = ['ProbabilityWeight-'+key for key in self.constants.keys()] self.addMetaKeys(*pbKey) self.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants.keys()],1.0)) def _expandVectorVariables(self): """ Expands vector variables to fit the requested shape. @ In, None @ Out, None """ # by default, just repeat this value into the desired shape. May be overloaded by other samplers. for var,shape in self.variableShapes.items(): baseVal = self.inputInfo['SampledVars'][var] self.inputInfo['SampledVars'][var] = np.ones(shape)*baseVal def _functionalVariables(self): """ Evaluates variables that are functions of other input variables. @ In, None @ Out, None """ # generate the function variable values for var in self.dependentSample.keys(): test=self.funcDict[var].evaluate("evaluate",self.values) for corrVar in var.split(","): self.values[corrVar.strip()] = test def _incrementCounter(self): """ Incrementes counter and sets up prefix. @ In, None @ Out, None """ #since we are creating the input for the next run we increase the counter and global counter self.counter +=1 self.auxcnt +=1 #exit if over the limit if self.counter > self.limit: self.raiseADebug('Exceeded number of points requested in sampling! Moving on...') #FIXME, the following condition check is make sure that the require info is only printed once when dump metadata to xml, this should be removed in the future when we have a better way to dump the metadata if self.counter >1: for key in self.entitiesToRemove: self.inputInfo.pop(key,None) if self.reseedAtEachIteration: randomUtils.randomSeed(self.auxcnt-1) self.inputInfo['prefix'] = str(self.counter) def _performVariableTransform(self): """ Performs variable transformations if existing. @ In, None @ Out, None """ # add latent variables and original variables to self.inputInfo if self.variablesTransformationDict: for dist,var in self.variablesTransformationDict.items(): if self.transformationMethod[dist] == 'pca': self.pcaTransform(var,dist) else: self.raiseAnError(NotImplementedError,'transformation method is not yet implemented for ' + self.transformationMethod[dist] + ' method') def _reassignSampledVarsPbToFullyCorrVars(self): """ Method to reassign sampledVarsPb to the fully correlated variables @ In, None @ Out, None """ fullyCorrVars = {s: self.inputInfo['SampledVarsPb'].pop(s) for s in self.inputInfo['SampledVarsPb'].keys() if "," in s} # assign the SampledVarsPb to the fully correlated vars for key in fullyCorrVars: for kkey in key.split(","): self.inputInfo['SampledVarsPb'][kkey] = fullyCorrVars[key] def _reassignPbWeightToCorrelatedVars(self): """ Method to reassign probability weight to the correlated variables @ In, None @ Out, None """ for varName, varInfo in self.variables2distributionsMapping.items(): # Handle ND Case if varInfo['totDim'] > 1: distName = self.variables2distributionsMapping[varName]['name'] self.inputInfo['ProbabilityWeight-' + varName] = self.inputInfo['ProbabilityWeight-' + distName] if "," in varName: for subVarName in varName.split(","): self.inputInfo['ProbabilityWeight-' + subVarName.strip()] = self.inputInfo['ProbabilityWeight-' + varName] def generateInput(self,model,oldInput): """ This method has to be overwritten to provide the specialization for the specific sampler The model instance in might be needed since, especially for external codes, only the code interface possesses the dictionary for reading the variable definition syntax @ In, model, model instance, it is the instance of a RAVEN model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, generateInput, tuple(0,list), list contains the new inputs -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model. The Out parameter depends on the results of generateInput If a new point is found, the default Out above is correct. If a restart point is found: @ Out, generateInput, tuple(int,dict), (1,realization dictionary) """ self._incrementCounter() model.getAdditionalInputEdits(self.inputInfo) self.localGenerateInput(model,oldInput) # split the sampled vars Pb among the different correlated variables self._reassignSampledVarsPbToFullyCorrVars() self._reassignPbWeightToCorrelatedVars() ##### TRANSFORMATION ##### self._performVariableTransform() ##### CONSTANT VALUES ###### self._constantVariables() ##### REDUNDANT FUNCTIONALS ##### self._functionalVariables() ##### VECTOR VARS ##### self._expandVectorVariables() ##### RESTART ##### index,inExisting = self._checkRestartForEvaluation() # reformat metadata into acceptable format for dataojbect # DO NOT format here, let that happen when a realization is made in collectOutput for each Model. Sampler doesn't care about this. # self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight']) # self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix']) #if not found or not restarting, we have a new point! if inExisting is None: self.raiseADebug('Found new point to sample:',self.values) ## The new info for the perturbed run will be stored in the sampler's ## inputInfo (I don't particularly like this, I think it should be ## returned here, but let's get this working and then we can decide how ## to best pass this information around. My reasoning is that returning ## it here means the sampler does not need to store it, and we can return ## a copy of the information, otherwise we have to be careful to create a ## deep copy of this information when we submit it to a job). ## -- DPM 4/18/17 return 0,oldInput #otherwise, return the restart point else: # TODO use realization format as per new data object (no subspaces) self.raiseADebug('Point found in restart!') rlz = {} # we've fixed it so the input and output space don't really matter, so use restartData's own definition # DO format the data as atleast_1d so it's consistent in the ExternalModel for users (right?) rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input')) rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output')+self.restartData.getVars('indexes')) rlz['metadata'] = copy.deepcopy(self.inputInfo) # TODO need deepcopy only because inputInfo is on self return 1,rlz def generateInputBatch(self,myInput,model,batchSize,projector=None): """ this function provide a mask to create several inputs at the same time It call the generateInput function as many time as needed @ In, myInput, list, list containing one input set @ In, model, model instance, it is the instance of a RAVEN model @ In, batchSize, int, the number of input sets required @ In, projector, object, optional, used for adaptive sampling to provide the projection of the solution on the success metric @ Out, newInputs, list of list, list of the list of input sets """ newInputs = [] while self.amIreadyToProvideAnInput() and (self.counter < batchSize): if projector==None: newInputs.append(self.generateInput(model,myInput)) else: newInputs.append(self.generateInput(model,myInput,projector)) return newInputs @abc.abstractmethod def localGenerateInput(self,model,oldInput): """ This class need to be overwritten since it is here that the magic of the sampler happens. After this method call the self.inputInfo should be ready to be sent to the model @ In, model, model instance, Model instance @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, None """ pass def pcaTransform(self,varsDict,dist): """ This method is used to map latent variables with respect to the model input variables both the latent variables and the model input variables will be stored in the dict: self.inputInfo['SampledVars'] @ In, varsDict, dict, dictionary contains latent and manifest variables {'latentVariables':[latentVar1,latentVar2,...], 'manifestVariables':[var1,var2,...]} @ In, dist, string, the distribution name associated with given variable set @ Out, None """ latentVariablesValues = [] listIndex = [] manifestVariablesValues = [None] * len(varsDict['manifestVariables']) for index,lvar in enumerate(varsDict['latentVariables']): for var,value in self.values.items(): if lvar == var: latentVariablesValues.append(value) listIndex.append(varsDict['latentVariablesIndex'][index]) varName = utils.first(utils.first(self.distributions2variablesMapping[dist]).keys()) varsValues = self.distDict[varName].pcaInverseTransform(latentVariablesValues,listIndex) for index1,index2 in enumerate(varsDict['manifestVariablesIndex']): manifestVariablesValues[index2] = varsValues[index1] manifestVariablesDict = dict(zip(varsDict['manifestVariables'],manifestVariablesValues)) self.values.update(manifestVariablesDict) ### FINALIZING METHODS #### def finalizeActualSampling(self,jobObject,model,myInput): """ This function is used by samplers that need to collect information from a finished run. Provides a generic interface that all samplers will use, for specifically handling any sub-class, the localFinalizeActualSampling should be overridden instead, as finalizeActualSampling provides only generic functionality shared by all Samplers and will in turn call the localFinalizeActualSampling before returning. @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ self.localFinalizeActualSampling(jobObject,model,myInput) def localFinalizeActualSampling(self,jobObject,model,myInput): """ Overwrite only if you need something special at the end of each run.... This function is used by samplers that need to collect information from the just ended run For example, for a Dynamic Event Tree case, this function can be used to retrieve the information from the just finished run of a branch in order to retrieve, for example, the distribution name that caused the trigger, etc. It is a essentially a place-holder for most of the sampler to remain compatible with the StepsCR structure @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ pass def finalizeSampler(self,failedRuns): """ Method called at the end of the Step when no more samples will be taken. Closes out sampler for step. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ self.handleFailedRuns(failedRuns) def handleFailedRuns(self,failedRuns): """ Collects the failed runs from the Step and allows samples to handle them individually if need be. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ self.raiseADebug('===============') self.raiseADebug('| RUN SUMMARY |') self.raiseADebug('===============') if len(failedRuns)>0: self.raiseAWarning('There were %i failed runs! Run with verbosity = debug for more details.' %(len(failedRuns))) for run in failedRuns: ## FIXME: run.command no longer exists, so I am removing the printing ## of it and the metadata for the time being, please let me know if this ## information is critical, as it is debug info, I cannot imagine it is ## important to keep. self.raiseADebug(' Run number %s FAILED:' %run.identifier) self.raiseADebug(' return code :',run.getReturnCode()) # metadata = run.getMetadata() # if metadata is not None: # self.raiseADebug(' sampled vars:') # for v,k in metadata['SampledVars'].items(): # self.raiseADebug(' ',v,':',k) else: self.raiseADebug('All runs completed without returning errors.') self._localHandleFailedRuns(failedRuns) self.raiseADebug('===============') self.raiseADebug(' END SUMMARY ') self.raiseADebug('===============') def _localHandleFailedRuns(self,failedRuns): """ Specialized method for samplers to handle failed runs. Defaults to failing runs. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ if len(failedRuns)>0: self.raiseAnError(IOError,'There were failed runs; aborting RAVEN.')
class PostProcessorInterfaceBase(utils.metaclass_insert(abc.ABCMeta, object), MessageHandler.MessageUser): """ This class is the base interfaced post-processor class It contains the three methods that need to be implemented: - initialize - run - readMoreXML """ def __init__(self, messageHandler): """ Constructor @ In, messageHandler, MessageHandler, message handler object @ Out, None """ self.type = self.__class__.__name__ self.name = self.__class__.__name__ self.messageHandler = messageHandler self.transformationSettings = { } # this dictionary is used to store all the setting required to back transform the data into its original format # it gets filled in the run method and used in the inverse method def initialize(self): """ Method to initialize the Interfaced Post-processor. Note that the user needs to specify two mandatory variables: - self.inputFormat: dataObject that the PP is supposed to receive in input - self.outputFormat: dataObject that the PP is supposed to generate in output These two variables check that the input and output dictionaries match what PP is supposed to receive and generate Refer to the manual on the format of these two dictionaries @ In, None @ Out, None """ self.inputFormat = None self.outputFormat = None def readMoreXML(self, xmlNode): """ Function that reads elements this post-processor will use @ In, xmlNode, ElementTree, Xml element node @ Out, None """ pass def run(self, inputDic): """ Method to post-process the dataObjects @ In, inputDic, dict, dictionary which contains the data inside the input DataObject @ Out, None """ pass def _inverse(self, inputDic): """ Method to perform the inverse of the post-process action @ In, inputDic, dict, dictionary which contains the data to be back pre-processed @ Out, None """ pass def checkGeneratedDicts(self, outputDic): """ Method to check that dictionary generated in def run(self, inputDic) is consistent @ In, outputDic, dict, dictionary generated by the run method @ Out, True/False, bool, outcome of the outputDic check """ checkInp = self.checkInputFormat(outputDic['data']['input']) checkOut = self.checkOutputFormat(outputDic['data']['output']) if checkInp and checkOut: return True else: if not checkInp: self.raiseAWarning('PP Generation check on Inputs failed!') if not checkOut: self.raiseAWarning('PP Generation check on Outputs failed!') return False def checkOutputFormat(self, outputDic): """ This method checks that the generated output part of the generated dictionary is built accordingly to outputFormat @ In, outputDic, dict, dictionary generated by the run method @ Out, outcome, bool, outcome of the outputDic check (True/False) """ outcome = True if isinstance(outputDic, dict): if self.outputFormat == 'HistorySet': for key in outputDic: if isinstance(outputDic[key], dict): outcome = outcome and True else: self.raiseAWarning('Bad PP output type for key:', key, ':', type(outputDic[key]), '; should be dict!') outcome = False for keys in outputDic[key]: if isinstance(outputDic[key][keys], (np.ndarray, c1darray)): outcome = outcome and True else: self.raiseAWarning( 'Bad PP output type for key:', key, keys, ':', type(outputDic[key][keys]), '; should be np.ndarray or c1darray!') outcome = False else: # self.outputFormat == 'PointSet': for key in outputDic: if isinstance(outputDic[key], (np.ndarray, c1darray)): outcome = outcome and True else: self.raiseAWarning( 'Bad PP output type for key:', key, ':', type(outputDic[key]), '; should be np.ndarray or c1darray!') outcome = False else: self.raiseAWarning('Bad PP output dict:', type(outputDic), 'is not a dict!') outcome = False return outcome def checkInputFormat(self, outputDic): """ This method checks that the generated input part of the generated dictionary is built accordingly to outputFormat @ In, outputDic, dict, dictionary generated by the run method @ Out, outcome, bool, outcome of the outputDic check (True/False) """ outcome = True if isinstance(outputDic, dict): for key in outputDic: if isinstance(outputDic[key], (np.ndarray, c1darray)): outcome = outcome and True else: self.raiseAWarning('Bad PP output type for key:', key, ':', type(outputDic[key]), '; should be np.ndarray or c1darray!') outcome = False else: self.raiseAWarning('Bad PP output dict:', type(outputDic), 'is not a dict!') outcome = False return outcome def checkArrayMonotonicity(time): """ This method checks that an array is increasing monotonically @ In, time, numpy array, array to be checked @ Out, outcome, bool, outcome of the monotonicity check """ outcome = True for t in time: if t != 0: if time[t] > time[t - 1]: outcome = outcome and True else: outcome = outcome and False return outcome
class StepManipulator(utils.metaclass_insert(abc.ABCMeta, object)): """ Base class for handling step sizing in optimization paths """ ########################## # Initialization Methods # ########################## @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, cls, the class for which we are retrieving the specification @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying input of cls. """ specs = InputData.parameterInputFactory(cls.__name__, ordered=False, strictMode=True) specs.description = 'Base class for Step Manipulation algorithms in the GradientDescent Optimizer.' return specs @classmethod def getSolutionExportVariableNames(cls): """ Compiles a list of acceptable SolutionExport variable options. @ In, None @ Out, vars, dict, acceptable variable names and descriptions """ return {} def __init__(self): """ Constructor. @ In, None @ Out, None """ # TODO ## Instance Variable Initialization # public self.type = self.__class__.__name__ self.needsAccessToAcceptance = False # if True, then this stepManip may need to modify opt point acceptance criteria # _protected self._optVars = None # optimization variable names (e.g. input space vars) # __private # additional methods def handleInput(self, specs): """ Read input specs @ In, specs, InputData.ParameterInput, parameter specs interpreted @ Out, None """ pass def initialize(self, optVars, **kwargs): """ initializes this object @ In, optVars, list(str), optimization variables (e.g. input space) @ In, kwargs, dict, additional arguments @ Out, None """ self._optVars = optVars ############### # Run Methods # ############### @abc.abstractmethod def initialStepSize(self, **kwargs): """ Calculates the first step size to use in the optimization path. @ In, kwargs, dict, keyword-based specifics as required by individual step sizers @ Out, stepSize, float, new step size """ @abc.abstractmethod def step(self, prevOpt, **kwargs): """ Calculates a new step size to use in the optimization path. @ In, prevOpt, dict, previous optimal point @ In, kwargs, dict, keyword-based specifics as required by individual step sizers @ Out, newOpt, dict, new optimal point @ Out, stepSize, float, new step size @ Out, stepInfo, dict, additional information about this step to store """ @abc.abstractmethod def fixConstraintViolations(self, proposed, previous, fixInfo): """ Given constraint violations, update the desired optimal point to consider. @ In, proposed, dict, proposed new optimal point @ In, previous, dict, previous optimal point @ In, fixInfo, dict, contains record of progress in fixing search @ Out, proposed, new proposed point @ Out, stepSize, new step size taken @ Out, fixInfo, updated fixing info """ @abc.abstractmethod def trajIsFollowing(self, traj, opt, info, data): """ Determines if the current trajectory is following another trajectory. @ In, traj, int, integer identifier for trajectory that needs to be checked @ In, opt, dict, most recent optimal point for trajectory @ In, info, dict, additional information about optimal point @ In, data, DataObjects.DataSet, data collected through optimization so far (SolutionExport) """ def modifyAcceptance(self, oldPoint, oldVal, newPoint, newVal): """ Allows modification of acceptance criteria. Note this is only called if self.needsAccessToAcceptance is True. @ In, oldPoint, dict, old opt point @ In, oldVal, float, old objective function value @ In, newPoint, dict, new opt point @ In, newVal, float, new objective function value """ pass def needDenormalized(self): """ Determines if this algorithm needs denormalized input spaces @ In, None @ Out, needDenormalized, bool, True if normalizing should NOT be performed """ return False def updateSolutionExport(self, stepHistory): """ Prints information to the solution export. @ In, stepHistory, list, (magnitude, versor, info) for each step entry @ Out, info, dict, realization of data to go in the solutionExport object """ # overload in inheriting classes at will return {}
class GradientApproximater(utils.metaclass_insert(abc.ABCMeta, object)): """ GradientApproximators use provided information to both select points required to estimate gradients as well as calculate the estimates. """ ########################## # Initialization Methods # ########################## @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, None @ Out, specs, InputData.ParameterInput, class to use for specifying input of cls. """ specs = InputData.parameterInputFactory(cls.__name__, ordered=False, strictMode=True) specs.description = 'Base class for gradient approximation methods used in the GradientDescent Optimizer.' specs.addSub(InputData.parameterInputFactory('gradDistanceScalar', contentType=InputTypes.FloatType, descr=r"""a scalar for the distance away from an optimal point candidate in the optimization search at which points should be evaluated to estimate the local gradient. This scalar is a multiplier for the step size used to reach this optimal point candidate from the previous optimal point, so this scalar should generally be a small percent. \default{0.01}""")) return specs @classmethod def getSolutionExportVariableNames(cls): """ Compiles a list of acceptable SolutionExport variable options. @ In, None @ Out, vars, dict, acceptable variable names and descriptions """ return {} def __init__(self): """ Constructor. @ In, None @ Out, None """ ## Instance Variable Initialization # public # _protected self._optVars = None # list(str) of opt variables self._proximity = 0.01 # float, scaling for perturbation distance self.N = None # int, dimensionality # __private # additional methods def handleInput(self, specs): """ Read input specs @ In, specs, InputData.ParameterInput, parameter specs interpreted @ Out, None """ proximity = specs.findFirst('gradDistanceScalar') if proximity is not None: self._proximity = proximity.value def initialize(self, optVars): """ After construction, finishes initialization of this approximator. @ In, optVars, list(str), list of optimization variable names @ In, proximity, float, percentage of step size away that neighbor samples should be taken @ Out, None """ self._optVars = optVars self.N = len(self._optVars) ############### # Run Methods # ############### @abc.abstractmethod def chooseEvaluationPoints(self, opt, stepSize): """ Determines new point(s) needed to evaluate gradient @ In, opt, dict, current opt point (normalized) @ In, stepSize, float, distance from opt point to sample neighbors @ Out, evalPoints, list(dict), list of points that need sampling @ Out, evalInfo, list(dict), identifying information about points """ @abc.abstractmethod def numGradPoints(self): """ Returns the number of grad points required for the method """ @abc.abstractmethod def evaluate(self, opt, grads, infos, objVar): """ Approximates gradient based on evaluated points. @ In, opt, dict, current opt point (normalized) @ In, grads, list(dict), evaluated neighbor points @ In, infos, list(dict), info about evaluated neighbor points @ In, objVar, string, objective variable @ Out, magnitude, float, magnitude of gradient @ Out, direction, dict, versor (unit vector) for gradient direction @ Out, foundInf, bool, if True then infinity calculations were used """ def needDenormalized(self): """ Determines if this algorithm needs denormalized input spaces @ In, None @ Out, needDenormalized, bool, True if normalizing should NOT be performed """ return False def updateSolutionExport(self, grads, gradInfos): """ Prints information to the solution export. @ In, grads, list, list of gradient magnitudes and versors @ In, gradInfos, list, list of identifying information for each grad entry @ Out, info, dict, realization of data to go in the solutionExport object """ # overload in inheriting classes at will return {}
class supervisedLearningGate(utils.metaclass_insert(abc.ABCMeta, BaseType), MessageHandler.MessageUser): """ This class represents an interface with all the supervised learning algorithms It is a utility class needed to hide the discernment between time-dependent and static surrogate models """ def __init__(self, ROMclass, messageHandler, **kwargs): """ A constructor that will appropriately initialize a supervised learning object (static or time-dependent) @ In, messageHandler, MessageHandler object, it is in charge of raising errors, and printing messages @ In, ROMclass, string, the surrogate model type @ In, kwargs, dict, an arbitrary list of kwargs @ Out, None """ self.printTag = 'SupervisedGate' self.messageHandler = messageHandler self.initializationOptions = kwargs self.amITrained = False self.ROMclass = ROMclass # members for clustered roms ### OLD ### #self._usingRomClustering = False # are we using ROM clustering? #self._romClusterDivisions = {} # which parameters do we cluster, and how are they subdivided? #self._romClusterLengths = {} # OR which parameters do we cluster, and how long should each be? #self._romClusterMetrics = None # list of requested metrics to apply (defaults to everything) #self._romClusterInfo = {} # data that should persist across methods #self._romClusterPivotShift = None # whether and how to normalize/shift subspaces #self._romClusterMap = None # maps labels to the ROMs that are represented by it #self._romClusterFeatureTemplate = '{target}|{metric}|{id}' # standardized for consistency #the ROM is instanced and initialized #if ROM comes from a pickled rom, this gate is just a placeholder and the Targets check doesn't apply self.pickled = self.initializationOptions.pop('pickled', False) # check if pivotParameter is specified and in case store it self.pivotParameterId = self.initializationOptions.get( "pivotParameter", 'time') # return instance of the ROMclass modelInstance = SupervisedLearning.returnInstance( ROMclass, self, **self.initializationOptions) # check if the model can autonomously handle the time-dependency # (if not and time-dep data are passed in, a list of ROMs are constructed) self.canHandleDynamicData = modelInstance.isDynamic() # is this ROM time-dependent ? self.isADynamicModel = False # if it is dynamic and time series are passed in, self.supervisedContainer is not going to be expanded, else it is going to self.supervisedContainer = [modelInstance] self.historySteps = [] nameToClass = {'segment': 'Segments', 'cluster': 'Clusters'} ### ClusteredRom ### # if the ROM targeted by this gate is a cluster, create the cluster now! if 'Segment' in self.initializationOptions: # read from specs directly segSpecs = self.initializationOptions['paramInput'].findFirst( 'Segment') # determine type of segment to load -> limited by InputData to specific options segType = segSpecs.parameterValues.get('grouping', 'segment') self.initializationOptions['modelInstance'] = modelInstance SVL = SupervisedLearning.returnInstance( nameToClass[segType], self, **self.initializationOptions) self.supervisedContainer = [SVL] def __getstate__(self): """ This function return the state of the ROM @ In, None @ Out, state, dict, it contains all the information needed by the ROM to be initialized """ # clear input specs, as they should all be read in by now ## this isn't a great implementation; we should make paramInput picklable instead! self.initializationOptions.pop('paramInput', None) for eng in self.supervisedContainer: eng.initOptionDict.pop('paramInput', None) # capture what is normally pickled state = self.__dict__.copy() if not self.amITrained: supervisedEngineObj = state.pop("supervisedContainer") del supervisedEngineObj return state def __setstate__(self, newstate): """ Initialize the ROM with the data contained in newstate @ In, newstate, dict, it contains all the information needed by the ROM to be initialized @ Out, None """ self.__dict__.update(newstate) if not newstate['amITrained']: # NOTE this will fail if the ROM requires the paramInput spec! Fortunately, you shouldn't pickle untrained. modelInstance = SupervisedLearning.returnInstance( self.ROMclass, self, **self.initializationOptions) self.supervisedContainer = [modelInstance] def reset(self): """ This method is aimed to reset the ROM @ In, None @ Out, None """ for rom in self.supervisedContainer: rom.reset() self.amITrained = False def reseed(self, seed): """ Used to reset the seed of the underlying ROMs. @ In, seed, int, new seed to use @ Out, None """ for rom in self.supervisedContainer: rom.reseed(seed) def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = self.supervisedContainer[-1].returnInitialParameters() return paramDict def train(self, trainingSet, assembledObjects=None): """ This function train the ROM this gate is linked to. This method is aimed to agnostically understand if a "time-dependent-like" ROM needs to be constructed. @ In, trainingSet, dict or list, data used to train the ROM; if a list is provided a temporal ROM is generated. @ In, assembledObjects, dict, optional, objects that the ROM Model has assembled via the Assembler @ Out, None """ if type(trainingSet).__name__ not in 'dict': self.raiseAnError(IOError, "The training set is not a dictionary!") if not list(trainingSet.keys()): self.raiseAnError(IOError, "The training set is empty!") # provide assembled objects to supervised container if assembledObjects is None: assembledObjects = {} self.supervisedContainer[0].setAssembledObjects(assembledObjects) # if training using ROMCollection, special treatment if isinstance(self.supervisedContainer[0], SupervisedLearning.Collection): self.supervisedContainer[0].train(trainingSet) else: # not a collection # TODO move time-dependent snapshots to collection! ## time-dependent or static ROM? if any(type(x).__name__ == 'list' for x in trainingSet.values()): # we need to build a "time-dependent" ROM self.isADynamicModel = True if self.pivotParameterId not in list(trainingSet.keys()): self.raiseAnError( IOError, 'The pivot parameter "{}" is not present in the training set.' .format(self.pivotParameterId), 'A time-dependent-like ROM cannot be created!') if type(trainingSet[self.pivotParameterId]).__name__ != 'list': self.raiseAnError( IOError, 'The pivot parameter "{}" is not a list.'.format( self.pivotParameterId), " Are you sure it is part of the output space of the training set?" ) self.historySteps = trainingSet.get(self.pivotParameterId)[-1] if not len(self.historySteps): self.raiseAnError(IOError, "the training set is empty!") # intrinsically time-dependent or does the Gate need to handle it? if self.canHandleDynamicData: # the ROM is able to manage the time dependency on its own self.supervisedContainer[0].train(trainingSet) else: # TODO we can probably migrate this time-dependent handling to a type of ROMCollection! # we need to construct a chain of ROMs # the check on the number of time steps (consistency) is performed inside the historySnapShoots method # get the time slices newTrainingSet = mathUtils.historySnapShoots( trainingSet, len(self.historySteps)) assert type(newTrainingSet).__name__ == 'list' # copy the original ROM originalROM = self.supervisedContainer[0] # start creating and training the time-dep ROMs self.supervisedContainer = [ ] # [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))] # train for ts in range(len(self.historySteps)): self.supervisedContainer.append( copy.deepcopy(originalROM)) self.supervisedContainer[-1].train(newTrainingSet[ts]) # if a static ROM ... else: #self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False) self.supervisedContainer[0].train(trainingSet) # END if ROMCollection self.amITrained = True def confidence(self, request): """ This is to get a value that is inversely proportional to the confidence that we have forecasting the target value for the given set of features. The reason to chose the inverse is because in case of normal distance this would be 1/distance that could be infinity @ In, request, dict, realizations request ({'feature1':np.array(n_realizations),'feature2',np.array(n_realizations)}) @ Out, confidenceDict, dict, the dictionary where the confidence is stored for each target """ if not self.amITrained: self.raiseAnError( RuntimeError, "ROM " + self.initializationOptions['name'] + " has not been trained yet and, consequentially, can not be evaluated!" ) confidenceDict = {} for rom in self.supervisedContainer: sliceEvaluation = rom.confidence(request) if len(list(confidenceDict.keys())) == 0: confidenceDict.update(sliceEvaluation) else: for key in confidenceDict.keys(): confidenceDict[key] = np.append(confidenceDict[key], sliceEvaluation[key]) return confidenceDict def evaluate(self, request): """ Method to perform the evaluation of a point or a set of points through the linked surrogate model @ In, request, dict, realizations request ({'feature1':np.array(n_realizations),'feature2',np.array(n_realizations)}) @ Out, resultsDict, dict, dictionary of results ({target1:np.array,'target2':np.array}). """ if self.pickled: self.raiseAnError( RuntimeError, 'ROM "' + self.initializationOptions['name'] + '" has not been loaded yet! Use an IOStep to load it.') if not self.amITrained: self.raiseAnError( RuntimeError, "ROM " + self.initializationOptions['name'] + " has not been trained yet and, consequentially, can not be evaluated!" ) resultsDict = {} if isinstance(self.supervisedContainer[0], SupervisedLearning.Collection): resultsDict = self.supervisedContainer[0].evaluate(request) else: for rom in self.supervisedContainer: sliceEvaluation = rom.evaluate(request) if len(list(resultsDict.keys())) == 0: resultsDict.update(sliceEvaluation) else: for key in resultsDict.keys(): resultsDict[key] = np.append(resultsDict[key], sliceEvaluation[key]) return resultsDict
class Optimizer(utils.metaclass_insert(abc.ABCMeta, BaseType), Assembler): """ This is the base class for optimizers Optimizer is a special type of "samplers" that own the optimization strategy (Type) and they generate the input values to optimize a loss function. They do not have distributions inside!!!! --Instance-- myInstance = Optimizer() myInstance.XMLread(xml.etree.ElementTree.Element) This method generates all the information that will be permanent for the object during the simulation --usage-- myInstance = Optimizer() myInstance.XMLread(xml.etree.ElementTree.Element) This method generate all permanent information of the object from <Simulation> myInstance.whatDoINeed() -see Assembler class- myInstance.initialize() This method is called from the <Step> before the Step process start. myInstance.amIreadyToProvideAnInput Requested from <Step> used to verify that the optimizer is available to generate a new input for the model myInstance.generateInput(self,model,oldInput) Requested from <Step> to generate a new input. Generate the new values and request to model to modify according the input and returning it back --Other inherited methods-- myInstance.whoAreYou() -see BaseType class- myInstance.myCurrentSetting() -see BaseType class- --Adding a new Optimizer subclass-- <MyClass> should inherit at least from Optimizer or from another derived class already presents DO NOT OVERRIDE any of the class method that are not starting with self.local* ADD your class to the dictionary __InterfaceDict in the Factory submodule The following method overriding is MANDATORY: self.localGenerateInput(model,oldInput) : this is where the step happens, after this call the output is ready self._localGenerateAssembler(initDict) self._localWhatDoINeed() the following methods could be overrode: self.localInputAndChecks(xmlNode) self.localGetInitParams() self.localGetCurrentSetting() self.localInitialize() self.localStillReady(ready) self.localFinalizeActualSampling(jobObject,model,myInput) """ def __init__(self): """ Default Constructor that will initialize member variables with reasonable defaults or empty lists/dictionaries where applicable. @ In, None @ Out, None """ #FIXME: Since the similarity of this class with the base sampler, we should merge this BaseType.__init__(self) Assembler.__init__(self) self.counter = { } # Dict containing counters used for based and derived class self.counter[ 'mdlEval'] = 0 # Counter of the model evaluation performed (better the input generated!!!). It is reset by calling the function self.initialize self.counter[ 'varsUpdate'] = 0 # Counter of the optimization iteration. self.limit = {} # Dict containing limits for each counter self.limit[ 'mdlEval'] = sys.maxsize # Maximum number of the loss function evaluation self.limit[ 'varsUpdate'] = sys.maxsize # Maximum number of the optimization iteration. self.initSeed = None # Seed for random number generators self.optVars = None # Decision variables for optimization self.optVarsInit = { } # Dict containing upper/lower bounds and initial of each decision variables self.optVarsInit['upperBound'] = { } # Dict containing upper bounds of each decision variables self.optVarsInit['lowerBound'] = { } # Dict containing lower bounds of each decision variables self.optVarsInit['initial'] = { } # Dict containing initial values of each decision variables self.optVarsHist = { } # History of normalized decision variables for each iteration self.nVar = 0 # Number of decision variables self.objVar = None # Objective variable to be optimized self.optType = None # Either maximize or minimize self.optTraj = None # Identifiers of parallel optimization trajectories self.thresholdTrajRemoval = None # Threshold used to determine the convergence of parallel optimization trajectories self.paramDict = { } # Dict containing additional parameters for derived class self.absConvergenceTol = 0.0 # Convergence threshold (absolute value) self.relConvergenceTol = 1.e-3 # Convergence threshold (relative value) self.solutionExport = None #This is the data used to export the solution (it could also not be present) self.values = { } # for each variable the current value {'var name':value} self.inputInfo = { } # depending on the optimizer several different type of keywarded information could be present only one is mandatory, see below self.inputInfo[ 'SampledVars'] = self.values # this is the location where to get the values of the sampled variables self.constants = {} # dictionary of constants variables self.FIXME = False # FIXME flag self.printTag = self.type # prefix for all prints (optimizer type) self._endJobRunnable = sys.maxsize # max number of inputs creatable by the optimizer right after a job ends self.constraintFunction = None # External constraint function, could be not present self.mdlEvalHist = None # Containing information of all model evaluation self.objSearchingROM = None # ROM used internally for fast loss function evaluation self.addAssemblerObject('Restart', '-n', True) self.addAssemblerObject('TargetEvaluation', '1') self.addAssemblerObject('Function', '-1') def _localGenerateAssembler(self, initDict): """ It is used for sending to the instanciated class, which is implementing the method, the objects that have been requested through "whatDoINeed" method It is an abstract method -> It must be implemented in the derived class! @ In, initDict, dict, dictionary ({'mainClassName(e.g., Databases):{specializedObjectName(e.g.,DatabaseForSystemCodeNamedWolf):ObjectInstance}'}) @ Out, None """ ## FIX ME -- this method is inherited from sampler and may not be needed by optimizer ## Currently put here as a place holder pass def _localWhatDoINeed(self): """ This method is a local mirror of the general whatDoINeed method. It is implemented by the optimizers that need to request special objects @ In, None @ Out, needDict, dict, list of objects needed """ ## FIX ME -- this method is inherited from sampler and may not be needed by optimizer ## Currently put here as a place holder return {} def _readMoreXML(self, xmlNode): """ Function to read the portion of the xml input that belongs to this specialized class and initialize some stuff based on the inputs got @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ Assembler._readMoreXML(self, xmlNode) self._readMoreXMLbase(xmlNode) self.localInputAndChecks(xmlNode) def _readMoreXMLbase(self, xmlNode): """ Function to read the portion of the xml input that belongs to the base optimizer only and initialize some stuff based on the inputs got @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node1 @ Out, None """ for child in xmlNode: if child.tag == "variable": if self.optVars == None: self.optVars = [] varname = str(child.attrib['name']) self.optVars.append(varname) for childChild in child: if childChild.tag == "upperBound": self.optVarsInit['upperBound'][varname] = float( childChild.text) elif childChild.tag == "lowerBound": self.optVarsInit['lowerBound'][varname] = float( childChild.text) elif childChild.tag == "initial": self.optVarsInit['initial'][varname] = {} temp = childChild.text.split(',') for trajInd, initVal in enumerate(temp): try: self.optVarsInit['initial'][varname][ trajInd] = float(initVal) except ValueError: self.raiseAnError( ValueError, "Unable to convert to float the intial value for variable " + varname + " in trajectory " + str(trajInd)) if self.optTraj == None: self.optTraj = range( len(self.optVarsInit['initial'] [varname].keys())) elif child.tag == "constant": value = utils.partialEval(child.text) if value is None: self.raiseAnError( IOError, 'The body of "constant" XML block should be a number. Got: ' + child.text) try: self.constants[child.attrib['name']] = value except KeyError: self.raiseAnError( KeyError, child.tag + ' must have the attribute "name"!!!') elif child.tag == "objectVar": self.objVar = child.text elif child.tag == "initialization": self.initSeed = Distributions.randomIntegers(0, 2**31, self) for childChild in child: if childChild.tag == "limit": self.limit['mdlEval'] = int(childChild.text) elif childChild.tag == "type": self.optType = childChild.text if self.optType not in ['min', 'max']: self.raiseAnError( IOError, 'Unknown optimization type ' + childChild.text + '. Available: mix or max') elif childChild.tag == "initialSeed": self.initSeed = int(childChild.text) elif childChild.tag == 'thresholdTrajRemoval': self.thresholdTrajRemoval = float(childChild.text) else: self.raiseAnError( IOError, 'Unknown tag ' + childChild.tag + ' .Available: limit, type, initialSeed!') elif child.tag == "convergence": for childChild in child: if childChild.tag == "iterationLimit": self.limit['varsUpdate'] = int(childChild.text) if childChild.tag == "absoluteThreshold": self.absConvergenceTol = float(childChild.text) if childChild.tag == "relativeThreshold": self.relConvergenceTol = float(childChild.text) elif child.tag == "restartTolerance": self.restartTolerance = float(child.text) elif child.tag == 'parameter': for childChild in child: self.paramDict[childChild.tag] = childChild.text if self.optType == None: self.optType = 'min' if self.thresholdTrajRemoval == None: self.thresholdTrajRemoval = 0.05 if self.initSeed == None: self.initSeed = Distributions.randomIntegers(0, 2**31, self) if self.objVar == None: self.raiseAnError( IOError, 'Object variable is not specified for optimizer!') if self.optVars == None: self.raiseAnError( IOError, 'Decision variable is not specified for optimizer!') else: self.optVars.sort() if self.optTraj == None: self.optTraj = [0] for varname in self.optVars: if varname not in self.optVarsInit['upperBound'].keys(): self.raiseAnError( IOError, 'Upper bound for ' + varname + ' is not provided') if varname not in self.optVarsInit['lowerBound'].keys(): self.raiseAnError( IOError, 'Lower bound for ' + varname + ' is not provided') if varname not in self.optVarsInit['initial'].keys(): self.optVarsInit['initial'][varname] = {} for trajInd in self.optTraj: self.optVarsInit['initial'][varname][trajInd] = ( self.optVarsInit['upperBound'][varname] + self.optVarsInit['lowerBound'][varname]) / 2.0 else: for trajInd in self.optTraj: initVal = self.optVarsInit['initial'][varname][trajInd] if initVal < self.optVarsInit['lowerBound'][ varname] or initVal > self.optVarsInit[ 'upperBound'][varname]: self.raiseAnError( IOError, "The initial value for variable " + varname + " and trajectory " + str(trajInd) + " is outside the domain identified by the lower and upper bounds!" ) if len(self.optTraj) != len( self.optVarsInit['initial'][varname].keys()): self.raiseAnError( ValueError, 'Number of initial values does not equal to the number of parallel optimization trajectories' ) self.optTrajLive = copy.deepcopy(self.optTraj) def localInputAndChecks(self, xmlNode): """ Local method. Place here the additional reading, remember to add initial parameters in the method localGetInitParams @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node @ Out, None """ pass # To be overwritten by subclass def endJobRunnable(self): """ Returns the maximum number of inputs allowed to be created by the optimizer right after a job ends @ In, None @ Out, endJobRunnable, int, number of runnable jobs at the end of each job """ return self._endJobRunnable def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} for variable in self.optVars: paramDict[variable] = 'is sampled as a decision variable' paramDict['limit_mdlEval'] = self.limit['mdlEval'] paramDict['limit_optIter'] = self.limit['varsUpdate'] paramDict['initial seed'] = self.initSeed paramDict.update(self.localGetInitParams()) return paramDict def localGetInitParams(self): """ Method used to export to the printer in the base class the additional PERMANENT your local class have @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def getCurrentSetting(self): """ This function is called from the base class to print some of the information inside the class. Whatever is a temporary value in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = {} paramDict['counter_mdlEval'] = self.counter['mdlEval'] paramDict['counter_varsUpdate'] = self.counter['varsUpdate'] paramDict['initial seed'] = self.initSeed for key in self.inputInfo: if key != 'SampledVars': paramDict[key] = self.inputInfo[key] else: for var in self.inputInfo['SampledVars'].keys(): paramDict['Variable: ' + var + ' has value'] = paramDict[key][var] paramDict.update(self.localGetCurrentSetting()) return paramDict def localGetCurrentSetting(self): """ Returns a dictionary with class specific information regarding the current status of the object. @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ return {} def initialize(self, externalSeeding=None, solutionExport=None): """ This function should be called every time a clean optimizer is needed. Called before takeAstep in <Step> @ In, externalSeeding, int, optional, external seed @ In, solutionExport, DataObject, optional, a PointSet to hold the solution @ Out, None """ self.counter['mdlEval'] = 0 self.counter['varsUpdate'] = [0] * len(self.optTraj) self.nVar = len(self.optVars) self.mdlEvalHist = self.assemblerDict['TargetEvaluation'][0][3] self.objSearchingROM = SupervisedLearning.returnInstance( 'SciKitLearn', self, **{ 'SKLtype': 'neighbors|KNeighborsRegressor', 'Features': ','.join(list(self.optVars)), 'Target': self.objVar, 'n_neighbors': 1, 'weights': 'distance' }) self.solutionExport = solutionExport if solutionExport != None and type( solutionExport).__name__ != "HistorySet": self.raiseAnError( IOError, 'solutionExport type is not a HistorySet. Got ' + type(solutionExport).__name__ + '!') if 'Function' in self.assemblerDict.keys(): self.constraintFunction = self.assemblerDict['Function'][0][3] if 'constrain' not in self.constraintFunction.availableMethods(): self.raiseAnError( IOError, 'the function provided to define the constraints must have an implemented method called "constrain"' ) if self.initSeed != None: Distributions.randomSeed(self.initSeed) # specializing the self.localInitialize() if solutionExport != None: self.localInitialize(solutionExport=solutionExport) else: self.localInitialize() def localInitialize(self, solutionExport=None): """ Use this function to add initialization features to the derived class it is call at the beginning of each step @ In, solutionExport, DataObject, optional, a PointSet to hold the solution @ Out, None """ pass # To be overwritten by subclass def amIreadyToProvideAnInput(self): #inLastOutput=None): """ This is a method that should be called from any user of the optimizer before requiring the generation of a new input. This method act as a "traffic light" for generating a new input. Reason for not being ready could be for example: exceeding number of model evaluation, convergence criteria met, etc. @ In, None @ Out, ready, bool, indicating the readiness of the optimizer to generate a new input. """ ready = True if self.counter['mdlEval'] < self.limit[ 'mdlEval'] else False convergence = self.checkConvergence() ready = self.localStillReady(ready, convergence) return ready def localStillReady(self, ready, convergence=False): #,lastOutput=None """ Determines if optimizer is ready to provide another input. If not, and if jobHandler is finished, this will end sampling. @ In, ready, bool, variable indicating whether the caller is prepared for another input. @ In, convergence, bool, optional, variable indicating whether the convergence criteria has been met. @ Out, ready, bool, variable indicating whether the caller is prepared for another input. """ return ready # To be overwritten by subclass def getLossFunctionGivenId(self, evaluationID): """ Method to get the Loss Function value given an evaluation ID @ In, evaluationID, string, the evaluation identifier (prefix) @ Out, functionValue, float, the loss function value """ objective = self.mdlEvalHist.getParametersValues( 'outputs', nodeId='RecontructEnding')[self.objVar] prefix = self.mdlEvalHist.getMetadata('prefix', nodeId='RecontructEnding') if len(prefix) > 0 and utils.returnIdSeparator() in prefix[0]: # ensemble model id modification # FIXME: Need to find a better way to handle this case prefix = [ key.split(utils.returnIdSeparator())[-1] for key in prefix ] search = dict(zip(prefix, objective)) functionValue = search.get(evaluationID, None) return functionValue def lossFunctionEval(self, optVars): """ Method to evaluate the loss function based on all model evaluation. @ In, optVars, dict, dictionary containing the values of decision variables to be evaluated optVars should have the form {varName1:[value11, value12,...value1n], varName2:[value21, value22,...value2n]...} @ Out, lossFunctionValue, numpy array, loss function values corresponding to each point in optVars """ tempDict = copy.copy( self.mdlEvalHist.getParametersValues('inputs', nodeId='RecontructEnding')) tempDict.update( self.mdlEvalHist.getParametersValues('outputs', nodeId='RecontructEnding')) for key in tempDict.keys(): tempDict[key] = np.asarray(tempDict[key]) self.objSearchingROM.train(tempDict) if self.gradDict['normalize']: optVars = self.denormalizeData(optVars) for key in optVars.keys(): optVars[key] = np.atleast_1d(optVars[key]) lossFunctionValue = self.objSearchingROM.evaluate(optVars)[self.objVar] if self.optType == 'min': return lossFunctionValue else: return lossFunctionValue * -1.0 def checkConstraint(self, optVars): """ Method to check whether a set of decision variables satisfy the constraint or not @ In, optVars, dict, dictionary containing the value of decision variables to be checked, in form of {varName: varValue} @ Out, satisfaction, tuple, (bool,list) => (variable indicating the satisfaction of constraints at the point optVars, list of the violated constrains) """ violatedConstrains = {'internal': [], 'external': []} if self.constraintFunction == None: satisfied = True else: satisfied = True if self.constraintFunction.evaluate( "constrain", optVars) == 1 else False if not satisfied: violatedConstrains['external'].append( self.constraintFunction.name) if self.gradDict['normalize']: optVars = self.denormalizeData(optVars) for var in optVars: if optVars[var] > self.optVarsInit['upperBound'][var] or optVars[ var] < self.optVarsInit['lowerBound'][var]: satisfied = False if optVars[var] > self.optVarsInit['upperBound'][var]: violatedConstrains['internal'].append( [var, self.optVarsInit['upperBound'][var]]) if optVars[var] < self.optVarsInit['lowerBound'][var]: violatedConstrains['internal'].append( [var, self.optVarsInit['lowerBound'][var]]) satisfied = self.localCheckConstraint(optVars, satisfied) satisfaction = satisfied, violatedConstrains return satisfaction @abc.abstractmethod def localCheckConstraint(self, optVars, satisfaction=True): """ Local method to check whether a set of decision variables satisfy the constraint or not @ In, optVars, dict, dictionary containing the value of decision variables to be checked, in form of {varName: varValue} @ In, satisfaction, bool, optional, variable indicating how the caller determines the constraint satisfaction at the point optVars @ Out, satisfaction, bool, variable indicating the satisfaction of constraints at the point optVars """ return satisfaction @abc.abstractmethod def checkConvergence(self): """ Method to check whether the convergence criteria has been met. @ In, none, @ Out, convergence, bool, variable indicating whether the convergence criteria has been met. """ def normalizeData(self, optVars): """ Method to normalize the data @ In, optVars, dict, dictionary containing the value of decision variables to be normalized, in form of {varName: varValue} @ Out, optVarsNorm, dict, dictionary containing the value of normalized decision variables, in form of {varName: varValue} """ optVarsNorm = {} for var in optVars.keys(): optVarsNorm[var] = (optVars[var] - self.optVarsInit['lowerBound'][var]) / ( self.optVarsInit['upperBound'][var] - self.optVarsInit['lowerBound'][var]) return optVarsNorm def denormalizeData(self, optVars): """ Method to normalize the data @ In, optVars, dict, dictionary containing the value of decision variables to be deormalized, in form of {varName: varValue} @ Out, optVarsDenorm, dict, dictionary containing the value of denormalized decision variables, in form of {varName: varValue} """ optVarsDenorm = {} for var in optVars.keys(): optVarsDenorm[var] = optVars[var] * ( self.optVarsInit['upperBound'][var] - self.optVarsInit['lowerBound'][var] ) + self.optVarsInit['lowerBound'][var] return optVarsDenorm def generateInput(self, model, oldInput): """ Method to generate input for model to run @ In, model, model instance, it is the instance of a RAVEN model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, generateInput, tuple(int,dict), (1,realization dictionary) """ self.counter[ 'mdlEval'] += 1 #since we are creating the input for the next run we increase the counter and global counter self.inputInfo['prefix'] = str(self.counter['mdlEval']) model.getAdditionalInputEdits(self.inputInfo) self.localGenerateInput(model, oldInput) #### CONSTANT VARIABLES #### if len(self.constants) > 0: self.values.update(self.constants) self.raiseADebug('Found new input to evaluate:', self.values) return 0, model.createNewInput(oldInput, self.type, **self.inputInfo) @abc.abstractmethod def localGenerateInput(self, model, oldInput): """ This class need to be overwritten since it is here that the magic of the optimizer happens. After this method call the self.inputInfo should be ready to be sent to the model @ In, model, model instance, it is the instance of a RAVEN model @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc) @ Out, None """ pass def finalizeActualSampling(self, jobObject, model, myInput): """ This function is used by optimizers that need to collect information from a finished run. Provides a generic interface that all optimizers will use, for specifically handling any sub-class, the localFinalizeActualSampling should be overridden instead, as finalizeActualSampling provides only generic functionality shared by all optimizers and will in turn call the localFinalizeActualSampling before returning. @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ self.localFinalizeActualSampling(jobObject, model, myInput) def localFinalizeActualSampling(self, jobObject, model, myInput): """ Overwrite only if you need something special at the end of each run.... This function is used by optimizers that need to collect information from the just ended run @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input """ pass def handleFailedRuns(self, failedRuns): """ Collects the failed runs from the Step and allows optimizer to handle them individually if need be. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ self.raiseADebug('===============') self.raiseADebug('| RUN SUMMARY |') self.raiseADebug('===============') if len(failedRuns) > 0: self.raiseAWarning( 'There were %i failed runs! Run with verbosity = debug for more details.' % (len(failedRuns))) for run in failedRuns: metadata = run.getMetadata() self.raiseADebug(' Run number %s FAILED:' % run.identifier, run.command) self.raiseADebug(' return code :', run.getReturnCode()) if metadata is not None: self.raiseADebug(' sampled vars:') for v, k in metadata['SampledVars'].items(): self.raiseADebug(' ', v, ':', k) else: self.raiseADebug('All runs completed without returning errors.') self._localHandleFailedRuns(failedRuns) self.raiseADebug('===============') self.raiseADebug(' END SUMMARY ') self.raiseADebug('===============') def _localHandleFailedRuns(self, failedRuns): """ Specialized method for optimizers to handle failed runs. Defaults to failing runs. @ In, failedRuns, list, list of JobHandler.ExternalRunner objects @ Out, None """ if len(failedRuns) > 0: self.raiseAnError(IOError, 'There were failed runs; aborting RAVEN.')
class Metric(utils.metaclass_insert(abc.ABCMeta, BaseType)): """ This is the general interface to any RAVEN metric object. It contains an initialize, a _readMoreXML, and an evaluation (i.e., distance) methods """ @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ In, cls, the class for which we are retrieving the specification @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying input of cls. """ inputSpecification = super(Metric, cls).getInputSpecification() return inputSpecification def __init__(self): """ This is the basic method initialize the metric object @ In, none @ Out, none """ BaseType.__init__(self) self.type = self.__class__.__name__ self.name = self.__class__.__name__ # If True the metric needs to be able to handle (value,probability) where value and probability are lists self.acceptsProbability = False # If True the metric needs to be able to handle a passed in Distribution self.acceptsDistribution = False # If True the metric needs to be able to handle dynamic data self._dynamicHandling = False # If True the metric needs to be able to handle pairwise data self._pairwiseHandling = False def initialize(self, inputDict): """ This method initialize each metric object @ In, inputDict, dict, dictionary containing initialization parameters @ Out, none """ pass def _readMoreXML(self, xmlNode): """ Method that reads the portion of the xml input that belongs to this specialized class and initialize internal parameters @ In, xmlNode, xml.etree.Element, Xml element node @ Out, None """ self._localReadMoreXML(xmlNode) def evaluate(self, x, y, weights=None, axis=0, **kwargs): """ This method compute the metric between x and y @ In, x, numpy.ndarray or instance of Distributions.Distribution, array containing data of x, or given distribution. @ In, y, numpy.ndarray, or instance of Distributions.Distribution, array containing data of y, or given distribution. @ In, weights, numpy.ndarray, optional, an array of weights associated with x @ In, axis, integer, optional, axis along which a metric is performed, default is 0, i.e. the metric will performed along the first dimension (the "rows"). If metric postprocessor is used, the first dimension is the RAVEN_sample_ID, and the second dimension is the pivotParameter if HistorySet is provided. @ In, kwargs, dict, dictionary of parameters characteristic of each metric @ Out, value, float or numpy.array, metric results between x and y """ value = self.__evaluateLocal__(x, y, weights=weights, axis=0, **kwargs) return value def isDynamic(self): """ This method is utility function that tells if the metric is able to treat dynamic data on its own or not @ In, None @ Out, isDynamic, bool, True if the metric is able to treat dynamic data, False otherwise """ return self._dynamicHandling def isPairwise(self): """ This method is utility function that tells if the metric is able to treat pairwise data on its own or not @ In, None @ Out, isPairwise, bool, True if the metric is able to handle pairwise data, False otherwise """ return self._pairwiseHandling @abc.abstractmethod def __evaluateLocal__(self, x, y, weights=None, axis=0, **kwargs): """
class supervisedLearning(utils.metaclass_insert(abc.ABCMeta), MessageHandler.MessageUser): """ This is the general interface to any supervisedLearning learning method. Essentially it contains a train method and an evaluate method """ returnType = '' # this describe the type of information generated the possibility are 'boolean', 'integer', 'float' qualityEstType = [ ] # this describe the type of estimator returned known type are 'distance', 'probability'. The values are returned by the self.__confidenceLocal__(Features) ROMtype = '' # the broad class of the interpolator ROMmultiTarget = False # ROMtimeDependent = False # is this ROM able to treat time-like (any monotonic variable) explicitly in its formulation? @staticmethod def checkArrayConsistency(arrayIn, isDynamic=False): """ This method checks the consistency of the in-array @ In, arrayIn, object, It should be an array @ In, isDynamic, bool, optional, is Dynamic? @ Out, (consistent, 'error msg'), tuple, tuple[0] is a bool (True -> everything is ok, False -> something wrong), tuple[1], string ,the error mesg """ #checking if None provides a more clear message about the problem if arrayIn is None: return (False, ' The object is None, and contains no entries!') if type(arrayIn).__name__ == 'list': if isDynamic: for cnt, elementArray in enumerate(arrayIn): resp = supervisedLearning.checkArrayConsistency( elementArray) if not resp[0]: return (False, ' The element number ' + str(cnt) + ' is not a consistent array. Error: ' + resp[1]) else: return (False, ' The list type is allowed for dynamic ROMs only') else: if type(arrayIn).__name__ not in ['ndarray', 'c1darray']: return (False, ' The object is not a numpy array. Got type: ' + type(arrayIn).__name__) if len(np.asarray(arrayIn).shape) > 1: return (False, ' The array must be 1-d. Got shape: ' + str(np.asarray(arrayIn).shape)) return (True, '') def __init__(self, messageHandler, **kwargs): """ A constructor that will appropriately initialize a supervised learning object @ In, messageHandler, MessageHandler object, it is in charge of raising errors, and printing messages @ In, kwargs, dict, an arbitrary list of kwargs @ Out, None """ self.printTag = 'Supervised' self.messageHandler = messageHandler self._dynamicHandling = False self._assembledObjects = None # objects assembled by the ROM Model, passed through. self.numThreads = kwargs.pop('NumThreads', None) #booleanFlag that controls the normalization procedure. If true, the normalization is performed. Default = True if kwargs != None: self.initOptionDict = kwargs else: self.initOptionDict = {} if 'Features' not in self.initOptionDict.keys(): self.raiseAnError(IOError, 'Feature names not provided') if 'Target' not in self.initOptionDict.keys(): self.raiseAnError(IOError, 'Target name not provided') self.features = self.initOptionDict.pop('Features') self.target = self.initOptionDict.pop('Target') self.verbosity = self.initOptionDict[ 'verbosity'] if 'verbosity' in self.initOptionDict else None for target in self.target: if target in self.features: #self.features.count(target) > 0: self.raiseAnError( IOError, 'The target "' + target + '" is also in the features!') #average value and sigma are used for normalization of the feature data #a dictionary where for each feature a tuple (average value, sigma) self.muAndSigmaFeatures = {} #these need to be declared in the child classes!!!! self.amITrained = False self.kerasROMDict = self.initOptionDict.pop( 'KerasROMDict', None) # dictionary for ROM builded by Keras def __getstate__(self): """ This function return the state of the ROM @ In, None @ Out, state, dict, it contains all the information needed by the ROM to be initialized """ state = copy.copy(self.__dict__) state['initOptionDict'].pop('paramInput', None) ## capture what is normally pickled if not self.amITrained: supervisedEngineObj = state.pop("supervisedContainer", None) del supervisedEngineObj return state def __setstate__(self, d): """ Initialize the ROM with the data contained in newstate @ In, d, dict, it contains all the information needed by the ROM to be initialized @ Out, None """ self.__dict__.update(d) def initialize(self, idict): """ Initialization method @ In, idict, dict, dictionary of initialization parameters @ Out, None """ pass #Overloaded by (at least) GaussPolynomialRom def setAssembledObjects(self, assembledObjects): """ Allows providing entities from the Assembler to be used in supervised learning algorithms. @ In, assembledObjects, dict, assembled objects that the ROM model requested as an Assembler. @ Out, None """ self._assembledObjects = assembledObjects def readAssembledObjects(self): """ Collects the entities from the Assembler as needed. In general, SVL don't need any assembled objects. @ In, None @ Out, None """ pass def train(self, tdict): """ Method to perform the training of the supervisedLearning algorithm NB.the supervisedLearning object is committed to convert the dictionary that is passed (in), into the local format the interface with the kernels requires. So far the base class will do the translation into numpy @ In, tdict, dict, training dictionary @ Out, None """ if type(tdict) != dict: self.raiseAnError( TypeError, 'In method "train", the training set needs to be provided through a dictionary. Type of the in-object is ' + str(type(tdict))) names, values = list(tdict.keys()), list(tdict.values()) ## This is for handling the special case needed by SKLtype=*MultiTask* that ## requires multiple targets. targetValues = [] for target in self.target: if target in names: targetValues.append(values[names.index(target)]) else: self.raiseAnError( IOError, 'The target ' + target + ' is not in the training set') #FIXME: when we do not support anymore numpy <1.10, remove this IF STATEMENT if int(np.__version__.split('.')[1]) >= 10: targetValues = np.stack(targetValues, axis=-1) else: sl = (slice(None), ) * np.asarray(targetValues[0]).ndim + ( np.newaxis, ) targetValues = np.concatenate( [np.asarray(arr)[sl] for arr in targetValues], axis=np.asarray(targetValues[0]).ndim) # construct the evaluation matrixes featureValues = np.zeros(shape=(len(targetValues), len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the training set') else: valueToUse = values[names.index(feat)] resp = self.checkArrayConsistency(valueToUse, self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) valueToUse = np.asarray(valueToUse) if len(valueToUse) != featureValues[:, 0].size: self.raiseAWarning('feature values:', featureValues[:, 0].size, tag='ERROR') self.raiseAWarning('target values:', len(valueToUse), tag='ERROR') self.raiseAnError( IOError, 'In training set, the number of values provided for feature ' + feat + ' are != number of target outcomes!') self._localNormalizeData(values, names, feat) # valueToUse can be either a matrix (for who can handle time-dep data) or a vector (for who can not) featureValues[:, cnt] = ( (valueToUse[:, 0] if len(valueToUse.shape) > 1 else valueToUse[:]) - self.muAndSigmaFeatures[feat][0] ) / self.muAndSigmaFeatures[feat][1] self.__trainLocal__(featureValues, targetValues) self.amITrained = True def _localNormalizeData(self, values, names, feat): """ Method to normalize data based on the mean and standard deviation. If undesired for a particular ROM, this method can be overloaded to simply pass (see, e.g., GaussPolynomialRom). @ In, values, list, list of feature values (from tdict) @ In, names, list, names of features (from tdict) @ In, feat, list, list of features (from ROM) @ Out, None """ self.muAndSigmaFeatures[feat] = mathUtils.normalizationFactors( values[names.index(feat)]) def confidence(self, edict): """ This call is used to get an estimate of the confidence in the prediction. The base class self.confidence will translate a dictionary into numpy array, then call the local confidence @ In, edict, dict, evaluation dictionary @ Out, confidence, float, the confidence """ if type(edict) != dict: self.raiseAnError( IOError, 'method "confidence". The inquiring set needs to be provided through a dictionary. Type of the in-object is ' + str(type(edict))) names, values = list(edict.keys()), list(edict.values()) for index in range(len(values)): resp = self.checkArrayConsistency(values[index], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In evaluate request for feature ' + names[index] + ':' + resp[1]) featureValues = np.zeros(shape=(values[0].size, len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the evaluate set') else: resp = self.checkArrayConsistency(values[names.index(feat)], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) featureValues[:, cnt] = values[names.index(feat)] return self.__confidenceLocal__(featureValues) def evaluate(self, edict): """ Method to perform the evaluation of a point or a set of points through the previous trained supervisedLearning algorithm NB.the supervisedLearning object is committed to convert the dictionary that is passed (in), into the local format the interface with the kernels requires. @ In, edict, dict, evaluation dictionary @ Out, evaluate, dict, {target: evaluated points} """ if type(edict) != dict: self.raiseAnError( IOError, 'method "evaluate". The evaluate request/s need/s to be provided through a dictionary. Type of the in-object is ' + str(type(edict))) names, values = list(edict.keys()), list(edict.values()) for index in range(len(values)): resp = self.checkArrayConsistency(values[index], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In evaluate request for feature ' + names[index] + ':' + resp[1]) # construct the evaluation matrix featureValues = np.zeros(shape=(values[0].size, len(self.features))) for cnt, feat in enumerate(self.features): if feat not in names: self.raiseAnError( IOError, 'The feature sought ' + feat + ' is not in the evaluate set') else: resp = self.checkArrayConsistency(values[names.index(feat)], self.isDynamic()) if not resp[0]: self.raiseAnError( IOError, 'In training set for feature ' + feat + ':' + resp[1]) featureValues[:, cnt] = ((values[names.index(feat)] - self.muAndSigmaFeatures[feat][0]) ) / self.muAndSigmaFeatures[feat][1] return self.__evaluateLocal__(featureValues) def reset(self): """ Reset ROM """ self.amITrained = False self.__resetLocal__() def returnInitialParameters(self): """ override this method to return the fix set of parameters of the ROM @ In, None @ Out, iniParDict, dict, initial parameter dictionary """ iniParDict = dict( list(self.initOptionDict.items()) + list({ 'returnType': self.__class__.returnType, 'qualityEstType': self.__class__.qualityEstType, 'Features': self.features, 'Target': self.target, 'returnType': self.__class__.returnType }.items()) + list(self.__returnInitialParametersLocal__().items())) return iniParDict def returnCurrentSetting(self): """ return the set of parameters of the ROM that can change during simulation @ In, None @ Out, currParDict, dict, current parameter dictionary """ currParDict = dict({'Trained': self.amITrained}.items() + self.__CurrentSettingDictLocal__().items()) return currParDict def writeXMLPreamble(self, writeTo, targets=None): """ Allows the SVE to put whatever it wants into an XML file only once (right before calling pringXML) Extend in subclasses. @ In, writeTo, xmlUtils.StaticXmlElement instance, Element to write to @ In, targets, list, list of targets for whom information should be written @ Out, None """ # different calls depending on if it's static or dynamic if isinstance(writeTo, xmlUtils.DynamicXmlElement): writeTo.addScalar('ROM', "type", self.printTag, None, general=True) else: writeTo.addScalar('ROM', "type", self.printTag) def writePointwiseData(self, *args): """ Allows the SVE to add data to a DataObject Overload in subclasses. @ In, args, list, unused arguments @ Out, None """ # by default, nothing to write! self.raiseAMessage( 'Writing ROM "{}", but no pointwise data found. Moving on ...') def writeXML(self, writeTo, targets=None, skip=None): """ Allows the SVE to put whatever it wants into an XML to print to file. Overload in subclasses. @ In, writeTo, xmlUtils.StaticXmlElement, StaticXmlElement to write to @ In, targets, list, optional, list of targets for whom information should be written @ In, skip, list, optional, list of targets to skip @ Out, None """ writeTo.addScalar('ROM', "noInfo", 'ROM has no special output options.') def isDynamic(self): """ This method is a utility function that tells if the relative ROM is able to treat dynamic data (e.g. time-series) on its own or not (Primarly called by LearningGate) @ In, None @ Out, isDynamic, bool, True if the ROM is able to treat dynamic data, False otherwise """ return self._dynamicHandling def reseed(self, seed): """ Used to reset the seed of the ROM. By default does nothing; overwrite in the inheriting classes as needed. @ In, seed, int, new seed to use @ Out, None """ return def setAdditionalParams(self, params): """ Sets parameters aside from initialization, such as during deserialization. @ In, params, dict, parameters to set (dependent on ROM) @ Out, None """ newMH = params.pop('messageHandler', None) if newMH: self.messageHandler = newMH # reseeding is common to many seed = params.pop('seed', None) if seed: self.reseed(seed) # overload this method in subclasses to load other parameters ### ROM Clustering (see ROMCollection.py) ### def isClusterable(self): """ Allows ROM to declare whether it has methods for clustring. Default is no. @ In, None @ Out, isClusterable, bool, if True then has clustering mechanics. """ # only true if overridden. return False def checkRequestedClusterFeatures(self, request): """ Takes the user-requested features (sometimes "all") and interprets them for this ROM. @ In, request, dict(list), as from ROMColletion.Cluster._extrapolateRequestedClusterFeatures @ Out, interpreted, dict(list), interpreted features """ self.raiseAnError( NotImplementedError, 'This ROM is not prepared to handle feature cluster requests!') def getLocalRomClusterFeatures(self, *args, **kwargs): """ Provides metrics aka features on which clustering compatibility can be measured. This is called on LOCAL subsegment ROMs, not on the GLOBAL template ROM @ In, featureTemplate, str, format for feature inclusion @ In, settings, dict, as per getGlobalRomSegmentSettings @ In, picker, slice, indexer for segmenting data @ In, kwargs, dict, arbitrary keyword arguments @ Out, features, dict, {target_metric: np.array(floats)} features to cluster on """ # TODO can we do a generic basic statistics clustering on mean, std for all roms? self.raiseAnError( NotImplementedError, 'Clustering capabilities not yet implemented for "{}" ROM!'.format( self.__class__.__name__)) def getGlobalRomSegmentSettings(self, trainingDict, divisions): """ Allows the ROM to perform some analysis before segmenting. Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL subsegment ROMs! @ In, trainingDict, dict, data for training @ In, divisions, tuple, (division slice indices, unclustered spaces) @ Out, settings, object, arbitrary information about ROM clustering settings @ Out, trainingDict, dict, adjusted training data (possibly unchanged) """ # by default, do nothing return None, trainingDict def adjustLocalRomSegment(self, settings): """ Adjusts this ROM to account for it being a segment as a part of a larger ROM collection. Call this before training the subspace segment ROMs Note this is called on the LOCAL subsegment ROMs, NOT on the GLOBAL templateROM from the ROMcollection! @ In, settings, dict, as from getGlobalRomSegmentSettings @ Out, None """ # by default, do nothing pass def finalizeLocalRomSegmentEvaluation(self, settings, evaluation, picker): """ Allows global settings in "settings" to affect a LOCAL evaluation of a LOCAL ROM Note this is called on the LOCAL subsegment ROM and not the GLOBAL templateROM. @ In, settings, dict, as from getGlobalRomSegmentSettings @ In, evaluation, dict, preliminary evaluation from the local segment ROM as {target: [values]} @ In, picker, slice, indexer for data range of this segment @ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation """ return evaluation def finalizeGlobalRomSegmentEvaluation(self, settings, evaluation): """ Allows any global settings to be applied to the signal collected by the ROMCollection instance. Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL supspace segment ROMs! @ In, evaluation, dict, {target: np.ndarray} evaluated full (global) signal from ROMCollection TODO finish docs @ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation """ return evaluation ### END ROM Clustering ### @abc.abstractmethod def __trainLocal__(self, featureVals, targetVals): """ Perform training on samples in featureVals with responses y. For an one-class model, +1 or -1 is returned. @ In, featureVals, {array-like, sparse matrix}, shape=[n_samples, n_features], an array of input feature values @ Out, targetVals, array, shape = [n_samples], an array of output target associated with the corresponding points in featureVals """ @abc.abstractmethod def __confidenceLocal__(self, featureVals): """ This should return an estimation of the quality of the prediction. This could be distance or probability or anything else, the type needs to be declared in the variable cls.qualityEstType @ In, featureVals, 2-D numpy array , [n_samples,n_features] @ Out, __confidenceLocal__, float, the confidence """ @abc.abstractmethod def __evaluateLocal__(self, featureVals): """ @ In, featureVals, np.array, 2-D numpy array [n_samples,n_features] @ Out, targetVals , np.array, 1-D numpy array [n_samples] """ @abc.abstractmethod def __resetLocal__(self): """ Reset ROM. After this method the ROM should be described only by the initial parameter settings @ In, None @ Out, None """ @abc.abstractmethod def __returnInitialParametersLocal__(self): """ Returns a dictionary with the parameters and their initial values @ In, None @ Out, params, dict, dictionary of parameter names and initial values """ @abc.abstractmethod def __returnCurrentSettingLocal__(self): """
class supervisedLearningGate(utils.metaclass_insert(abc.ABCMeta, BaseType), MessageHandler.MessageUser): """ This class represents an interface with all the supervised learning algorithms It is a utility class needed to hide the discernment between time-dependent and static surrogate models """ def __init__(self, ROMclass, messageHandler, **kwargs): """ A constructor that will appropriately initialize a supervised learning object (static or time-dependent) @ In, messageHandler, MessageHandler object, it is in charge of raising errors, and printing messages @ In, ROMclass, string, the surrogate model type @ In, kwargs, dict, an arbitrary list of kwargs @ Out, None """ self.printTag = 'SupervisedGate' self.messageHandler = messageHandler self.initializationOptions = kwargs self.amITrained = False self.ROMclass = ROMclass #the ROM is instanced and initialized #if ROM comes from a pickled rom, this gate is just a placeholder and the Targets check doesn't apply self.pickled = self.initializationOptions.pop('pickled', False) if not self.pickled: # check how many targets if not 'Target' in self.initializationOptions.keys(): self.raiseAnError(IOError, 'No Targets specified!!!') # return instance of the ROMclass modelInstance = SupervisedLearning.returnInstance( ROMclass, self, **self.initializationOptions) # check if the model can autonomously handle the time-dependency (if not and time-dep data are passed in, a list of ROMs are constructed) self.canHandleDynamicData = modelInstance.isDynamic() # is this ROM time-dependent ? self.isADynamicModel = False # if it is dynamic and time series are passed in, self.supervisedContainer is not going to be expanded, else it is going to self.supervisedContainer = [modelInstance] # check if pivotParameter is specified and in case store it self.pivotParameterId = self.initializationOptions.pop( "pivotParameter", 'time') # self.historySteps = [] def __getstate__(self): """ This function return the state of the ROM @ In, None @ Out, state, dict, it contains all the information needed by the ROM to be initialized """ # capture what is normally pickled state = self.__dict__.copy() if not self.amITrained: supervisedEngineObj = state.pop("supervisedContainer") del supervisedEngineObj return state def __setstate__(self, newstate): """ Initialize the ROM with the data contained in newstate @ In, newstate, dict, it contains all the information needed by the ROM to be initialized @ Out, None """ self.__dict__.update(newstate) if not self.amITrained: modelInstance = SupervisedLearning.returnInstance( self.ROMclass, self, **self.initializationOptions) self.supervisedContainer = [modelInstance] def reset(self): """ This method is aimed to reset the ROM @ In, None @ Out, None """ for rom in self.supervisedContainer: rom.reset() self.amITrained = False def getInitParams(self): """ This function is called from the base class to print some of the information inside the class. Whatever is permanent in the class and not inherited from the parent class should be mentioned here The information is passed back in the dictionary. No information about values that change during the simulation are allowed @ In, None @ Out, paramDict, dict, dictionary containing the parameter names as keys and each parameter's initial value as the dictionary values """ paramDict = self.supervisedContainer[-1].returnInitialParameters() return paramDict def train(self, trainingSet): """ This function train the ROM this gate is linked to. This method is aimed to agnostically understand if a "time-dependent-like" ROM needs to be constructed. @ In, trainingSet, dict or list, data used to train the ROM; if a list is provided a temporal ROM is generated. @ Out, None """ if type(trainingSet).__name__ not in 'dict': self.raiseAnError(IOError, "The training set is not a dictionary!") if len(trainingSet.keys()) == 0: self.raiseAnError(IOError, "The training set is empty!") if any(type(x).__name__ == 'list' for x in trainingSet.values()): # we need to build a "time-dependent" ROM self.isADynamicModel = True if self.pivotParameterId not in trainingSet.keys(): self.raiseAnError( IOError, "the pivot parameter " + self.pivotParameterId + " is not present in the training set. A time-dependent-like ROM cannot be created!" ) if type(trainingSet[self.pivotParameterId]).__name__ != 'list': self.raiseAnError( IOError, "the pivot parameter " + self.pivotParameterId + " is not a list. Are you sure it is part of the output space of the training set?" ) self.historySteps = trainingSet.get(self.pivotParameterId)[-1] if len(self.historySteps) == 0: self.raiseAnError(IOError, "the training set is empty!") if self.canHandleDynamicData: # the ROM is able to manage the time dependency on its own self.supervisedContainer[0].train(trainingSet) else: # we need to construct a chain of ROMs # the check on the number of time steps (consistency) is performed inside the historySnapShoots method # get the time slices newTrainingSet = mathUtils.historySnapShoots( trainingSet, len(self.historySteps)) if type(newTrainingSet).__name__ != 'list': self.raiseAnError(IOError, newTrainingSet) # copy the original ROM originalROM = copy.deepcopy(self.supervisedContainer[0]) # start creating and training the time-dep ROMs self.supervisedContainer = [ ] # [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))] # train for ts in range(len(self.historySteps)): self.supervisedContainer.append(copy.deepcopy(originalROM)) self.supervisedContainer[-1].train(newTrainingSet[ts]) else: #self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False) self.supervisedContainer[0].train(trainingSet) self.amITrained = True def confidence(self, request): """ This is to get a value that is inversely proportional to the confidence that we have forecasting the target value for the given set of features. The reason to chose the inverse is because in case of normal distance this would be 1/distance that could be infinity @ In, request, dict, realizations request ({'feature1':np.array(n_realizations),'feature2',np.array(n_realizations)}) @ Out, confidenceDict, dict, the dictionary where the confidence is stored for each target """ if not self.amITrained: self.raiseAnError( RuntimeError, "ROM " + self.initializationOptions['name'] + " has not been trained yet and, consequentially, can not be evaluated!" ) confidenceDict = {} for rom in self.supervisedContainer: sliceEvaluation = rom.confidence(request) if len(confidenceDict.keys()) == 0: confidenceDict.update(sliceEvaluation) else: for key in confidenceDict.keys(): confidenceDict[key] = np.append(confidenceDict[key], sliceEvaluation[key]) return confidenceDict def evaluate(self, request): """ Method to perform the evaluation of a point or a set of points through the linked surrogate model @ In, request, dict, realizations request ({'feature1':np.array(n_realizations),'feature2',np.array(n_realizations)}) @ Out, resultsDict, dict, dictionary of results ({target1:np.array,'target2':np.array}). """ if self.pickled: self.raiseAnError( RuntimeError, 'ROM "' + self.initializationOptions['name'] + '" has not been loaded yet! Use an IOStep to load it.') if not self.amITrained: self.raiseAnError( RuntimeError, "ROM " + self.initializationOptions['name'] + " has not been trained yet and, consequentially, can not be evaluated!" ) resultsDict = {} for rom in self.supervisedContainer: sliceEvaluation = rom.evaluate(request) if len(resultsDict.keys()) == 0: resultsDict.update(sliceEvaluation) else: for key in resultsDict.keys(): resultsDict[key] = np.append(resultsDict[key], sliceEvaluation[key]) return resultsDict
class TimeSeriesAnalyzer(utils.metaclass_insert(abc.ABCMeta, object)): """ Act as base class for objects that coordinate the time series analysis algorithms in RAVEN. Note these are not the ROM/SupervisedLearning objects; rather, used by those as well as other algorithms throughout the code. Maintain these algorithims in a way they can be called without accessing all of RAVEN. """ # class attribute ## define the clusterable features for this trainer. _features = [] @classmethod def getInputSpecification(cls): """ Method to get a reference to a class that specifies the input data for class cls. @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying input of cls. """ specs = InputData.parameterInputFactory(cls.__name__, ordered=False, strictMode=True) specs.description = 'Base class for time series analysis algorithms used in RAVEN.' specs.addParam( 'target', param_type=InputTypes.StringListType, required=True, descr= r"""indicates the variables for which this algorithm will be used for characterization. """ ) specs.addParam( 'seed', param_type=InputTypes.IntegerType, required=False, descr= r"""sets a seed for the underlying random number generator, if present.""" ) return specs ### INHERITED METHODS ### def __init__(self, *args, **kwargs): """ A constructor that will appropriately intialize a supervised learning object @ In, args, list, an arbitrary list of positional values @ In, kwargs, dict, an arbitrary dictionary of keywords and values @ Out, None """ self.name = self.__class__.__name__ # the name the class shall be known by during its RAVEN life def handleInput(self, spec): """ Reads user inputs into this object. @ In, inp, InputData.InputParams, input specifications @ Out, settings, dict, initialization settings for this algorithm """ settings = {} settings['target'] = spec.parameterValues['target'] settings['seed'] = spec.parameterValues.get('seed', None) settings = self.setDefaults(settings) return settings def setDefaults(self, settings): """ Fills default values for settings with default values. @ In, settings, dict, existing settings @ Out, settings, dict, modified settings """ if 'seed' not in settings: settings['seed'] = None return settings @abc.abstractmethod def characterize(self, signal, pivot, targets, settings): """ Characterizes the provided time series ("signal") using methods specific to this algorithm. @ In, signal, np.array, time-dependent series @ In, pivot, np.array, time-like parameter @ In, targets, list(str), names of targets @ In, settings, dict, additional settings specific to algorithm @ Out, params, dict, characterization of signal; structure as: params[target variable][characteristic] = value """ def getResidual(self, initial, params, pivot, settings): """ Removes trained signal from data and find residual @ In, initial, np.array, original signal shaped [pivotValues, targets], targets MUST be in same order as self.target @ In, params, dict, training parameters as from self.characterize @ In, pivot, np.array, time-like array values @ In, settings, dict, additional settings specific to algorithm @ Out, residual, np.array, reduced signal shaped [pivotValues, targets] """ # DEFAULT IMPLEMENTATION, generate one signal and subtract it from the given one # -> overload in inheritors to change behavior sample = self.generate(params, pivot, settings) residual = initial - sample return residual @abc.abstractmethod def generate(self, params, pivot, settings): """ Generates a synthetic history from fitted parameters. @ In, params, dict, training parameters as from self.characterize @ In, pivot, np.array, time-like array values @ In, settings, dict, additional settings specific to algorithm @ Out, synthetic, np.array(float), synthetic signal """ def writeXML(self, writeTo, params): """ Allows the engine to put whatever it wants into an XML to print to file. @ In, writeTo, xmlUtils.StaticXmlElement, entity to write to @ In, params, dict, parameters from training this ROM @ Out, None """ pass # overwrite in subclasses if desired