Esempio n. 1
0
    def localGenerateInput(self, model, myInput):
        """
      Function to select the next most informative point for refining the limit
      surface search.
      After this method is called, the self.inputInfo should be ready to be sent
      to the model
      @ In, model, model instance, an instance of a model
      @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
      @ Out, None
    """
        try:
            pt, weight = self.sparseGrid[self.counter - 1]
        except IndexError:
            raise utils.NoMoreSamplesNeeded

        for v, varName in enumerate(self.sparseGrid.varNames):
            # compute the SampledVarsPb for 1-D distribution
            if self.variables2distributionsMapping[varName]['totDim'] == 1:
                for key in varName.strip().split(','):
                    self.values[key] = pt[v]
                self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                    varName].pdf(pt[v])
                self.inputInfo['ProbabilityWeight-' + varName.replace(
                    ",", "-")] = self.inputInfo['SampledVarsPb'][varName]
            # compute the SampledVarsPb for N-D distribution
            # Assume only one N-D distribution is associated with sparse grid collocation method
            elif self.variables2distributionsMapping[varName][
                    'totDim'] > 1 and self.variables2distributionsMapping[
                        varName]['reducedDim'] == 1:
                dist = self.variables2distributionsMapping[varName]['name']
                ndCoordinates = np.zeros(
                    len(self.distributions2variablesMapping[dist]))
                positionList = self.distributions2variablesIndexList[dist]
                for varDict in self.distributions2variablesMapping[dist]:
                    var = utils.first(varDict.keys())
                    position = utils.first(varDict.values())
                    location = -1
                    for key in var.strip().split(','):
                        if key in self.sparseGrid.varNames:
                            location = self.sparseGrid.varNames.index(key)
                            break
                    if location > -1:
                        ndCoordinates[positionList.index(
                            position)] = pt[location]
                    else:
                        self.raiseAnError(
                            IOError, 'The variables ' + var +
                            ' listed in sparse grid collocation sampler, but not used in the ROM!'
                        )
                    for key in var.strip().split(','):
                        self.values[key] = pt[location]
                self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                    varName].pdf(ndCoordinates)
                self.inputInfo['ProbabilityWeight-' + varName.replace(
                    ",", "!")] = self.inputInfo['SampledVarsPb'][varName]

        self.inputInfo['ProbabilityWeight'] = weight
        self.inputInfo['PointProbability'] = reduce(
            mul, self.inputInfo['SampledVarsPb'].values())
        self.inputInfo['SamplerType'] = 'Sparse Grid Collocation'
Esempio n. 2
0
 def pcaTransform(self, varsDict, dist):
     """
   This method is used to map latent variables with respect to the model input variables
   both the latent variables and the model input variables will be stored in the dict: self.inputInfo['SampledVars']
   @ In, varsDict, dict, dictionary contains latent and manifest variables {'latentVariables':[latentVar1,latentVar2,...], 'manifestVariables':[var1,var2,...]}
   @ In, dist, string, the distribution name associated with given variable set
   @ Out, None
 """
     latentVariablesValues = []
     listIndex = []
     manifestVariablesValues = [None] * len(varsDict['manifestVariables'])
     for index, lvar in enumerate(varsDict['latentVariables']):
         for var, value in self.values.items():
             if lvar == var:
                 latentVariablesValues.append(value)
                 listIndex.append(varsDict['latentVariablesIndex'][index])
     varName = utils.first(
         utils.first(self.distributions2variablesMapping[dist]).keys())
     varsValues = self.distDict[varName].pcaInverseTransform(
         latentVariablesValues, listIndex)
     for index1, index2 in enumerate(varsDict['manifestVariablesIndex']):
         manifestVariablesValues[index2] = varsValues[index1]
     manifestVariablesDict = dict(
         zip(varsDict['manifestVariables'], manifestVariablesValues))
     self.values.update(manifestVariablesDict)
Esempio n. 3
0
    def _computeWeightedPercentile(self, arrayIn, pbWeight, percent=0.5):
        """
      Method to compute the weighted percentile in a array of data
      @ In, arrayIn, list/numpy.array, the array of values from which the percentile needs to be estimated
      @ In, pbWeight, list/numpy.array, the reliability weights that correspond to the values in 'array'
      @ In, percent, float, the percentile that needs to be computed (between 0.01 and 1.0)
      @ Out, result, float, the percentile
    """

        idxs = np.argsort(np.asarray(list(zip(pbWeight, arrayIn)))[:, 1])
        # Inserting [0.0,arrayIn[idxs[0]]] is needed when few samples are generated and
        # a percentile that is < that the first pb weight is requested. Otherwise the median
        # is returned.
        sortedWeightsAndPoints = np.insert(np.asarray(
            list(zip(pbWeight[idxs], arrayIn[idxs]))),
                                           0, [0.0, arrayIn[idxs[0]]],
                                           axis=0)
        weightsCDF = np.cumsum(sortedWeightsAndPoints[:, 0])
        # This step returns the index of the array which is < than the percentile, because
        # the insertion create another entry, this index should shift to the bigger side
        indexL = utils.first(np.asarray(weightsCDF >= percent).nonzero())[0]
        # This step returns the indices (list of index) of the array which is > than the percentile
        indexH = utils.first(np.asarray(weightsCDF > percent).nonzero())
        try:
            # if the indices exists that means the desired percentile lies between two data points
            # with index as indexL and indexH[0]. Calculate the midpoint of these two points
            result = 0.5 * (sortedWeightsAndPoints[indexL, 1] +
                            sortedWeightsAndPoints[indexH[0], 1])
        except IndexError:
            result = sortedWeightsAndPoints[indexL, 1]
        return result
Esempio n. 4
0
 def localGenerateInput(self, model, myInput):
     """
   Function to select the next most informative point
   After this method is called, the self.inputInfo should be ready to be sent
   to the model
   @ In, model, model instance, an instance of a model
   @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
   @ Out, None
 """
     self.inputInfo['ProbabilityWeight'] = 1.0
     pt = self.neededPoints.pop()
     self.submittedNotCollected.append(pt)
     for v, varName in enumerate(self.sparseGrid.varNames):
         # compute the SampledVarsPb for 1-D distribution
         if self.variables2distributionsMapping[varName]['totDim'] == 1:
             for key in varName.strip().split(','):
                 self.values[key] = pt[v]
             self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                 varName].pdf(pt[v])
             self.inputInfo[
                 'ProbabilityWeight-' +
                 varName] = self.inputInfo['SampledVarsPb'][varName]
             # compute the SampledVarsPb for N-D distribution
         elif self.variables2distributionsMapping[varName][
                 'totDim'] > 1 and self.variables2distributionsMapping[
                     varName]['reducedDim'] == 1:
             dist = self.variables2distributionsMapping[varName]['name']
             ndCoordinates = np.zeros(
                 len(self.distributions2variablesMapping[dist]))
             positionList = self.distributions2variablesIndexList[dist]
             for varDict in self.distributions2variablesMapping[dist]:
                 var = utils.first(varDict.keys())
                 position = utils.first(varDict.values())
                 location = -1
                 for key in var.strip().split(','):
                     if key in self.sparseGrid.varNames:
                         location = self.sparseGrid.varNames.index(key)
                         break
                 if location > -1:
                     ndCoordinates[positionList.index(
                         position)] = pt[location]
                 else:
                     self.raiseAnError(
                         IOError, 'The variables ' + var +
                         ' listed in sparse grid collocation sampler, but not used in the ROM!'
                     )
                 for key in var.strip().split(','):
                     self.values[key] = pt[location]
             self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                 varName].pdf(ndCoordinates)
             self.inputInfo['ProbabilityWeight-' +
                            dist] = self.inputInfo['SampledVarsPb'][varName]
             self.inputInfo['ProbabilityWeight'] *= self.inputInfo[
                 'ProbabilityWeight-' + dist]
     self.inputInfo['PointProbability'] = reduce(
         mul, self.inputInfo['SampledVarsPb'].values())
     self.inputInfo['SamplerType'] = self.type
Esempio n. 5
0
 def localGenerateInput(self, model, myInput):
     """
   Function to select the next most informative point
   @ In, model, model instance, an instance of a model
   @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
   @ Out, None
 """
     try:
         pt = self.pointsToRun[self.counter - 1]
     except IndexError:
         self.raiseADebug('All sparse grids are complete!  Moving on...')
         raise utils.NoMoreSamplesNeeded
     for v, varName in enumerate(self.features):
         # compute the SampledVarsPb for 1-D distribution
         if self.variables2distributionsMapping[varName]['totDim'] == 1:
             for key in varName.strip().split(','):
                 self.values[key] = pt[v]
             self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                 varName].pdf(pt[v])
             self.inputInfo[
                 'ProbabilityWeight-' +
                 varName] = self.inputInfo['SampledVarsPb'][varName]
         # compute the SampledVarsPb for N-D distribution
         elif self.variables2distributionsMapping[varName][
                 'totDim'] > 1 and self.variables2distributionsMapping[
                     varName]['reducedDim'] == 1:
             dist = self.variables2distributionsMapping[varName]['name']
             ndCoordinates = np.zeros(
                 len(self.distributions2variablesMapping[dist]))
             positionList = self.distributions2variablesIndexList[dist]
             for varDict in self.distributions2variablesMapping[dist]:
                 var = utils.first(varDict.keys())
                 position = utils.first(varDict.values())
                 location = -1
                 for key in var.strip().split(','):
                     if key in self.features:
                         location = self.features.index(key)
                         break
                 if location > -1:
                     ndCoordinates[positionList.index(
                         position)] = pt[location]
                 else:
                     self.raiseAnError(
                         IOError, 'The variables ' + var +
                         ' listed in sobol sampler, but not used in the ROM!'
                     )
                 for key in var.strip().split(','):
                     self.values[key] = pt[location]
             self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                 varName].pdf(ndCoordinates)
             self.inputInfo['ProbabilityWeight-' +
                            dist] = self.inputInfo['SampledVarsPb'][varName]
     self.inputInfo['PointProbability'] = reduce(
         mul, self.inputInfo['SampledVarsPb'].values())
     self.inputInfo['ProbabilityWeight'] = np.atleast_1d(
         1.0)  # weight has no meaning for sobol
     self.inputInfo['SamplerType'] = 'Sparse Grids for Sobol'
Esempio n. 6
0
 def localInitialize(self, solutionExport=None):
     """
   Will perform all initialization specific to this Sampler. For instance,
   creating an empty container to hold the identified surface points, error
   checking the optionally provided solution export and other preset values,
   and initializing the limit surface Post-Processor used by this sampler.
   @ In, solutionExport, DataObjects, optional, a PointSet to hold the solution (a list of limit surface points)
   @ Out, None
 """
     if self.detAdaptMode == 2:
         self.startAdaptive = True
     # we first initialize the LimitSurfaceSearch sampler
     LimitSurfaceSearch.localInitialize(self, solutionExport=solutionExport)
     if self.hybridDETstrategy is not None:
         # we are running an adaptive hybrid DET and not only an adaptive DET
         if self.hybridDETstrategy == 1:
             gridVector = self.limitSurfacePP.gridEntity.returnParameter(
                 "gridVectors")
             # construct an hybrid DET through an XML node
             distDict, xmlNode = {}, ET.fromstring(
                 '<InitNode> <HybridSampler type="Grid" name="none"/> </InitNode>'
             )
             for varName, dist in self.distDict.items():
                 if varName.replace('<distribution>',
                                    '') in self.epistemicVariables.keys():
                     # found an epistemic
                     varNode = ET.Element(
                         'Distribution'
                         if varName.startswith('<distribution>') else
                         'variable',
                         {'name': varName.replace('<distribution>', '')})
                     varNode.append(
                         ET.fromstring("<distribution>" +
                                       dist.name.strip() +
                                       "</distribution>"))
                     distDict[dist.name.strip()] = self.distDict[varName]
                     varNode.append(
                         ET.fromstring(
                             '<grid construction="custom" type="value">' +
                             ' '.join([
                                 str(elm)
                                 for elm in utils.first(gridVector.values())
                                 [varName.replace('<distribution>', '')]
                             ]) + '</grid>'))
                     xmlNode.find("HybridSampler").append(varNode)
             #TODO, need to pass real paramInput
             self._localInputAndChecksHybrid(xmlNode, paramInput=None)
             for hybridsampler in self.hybridStrategyToApply.values():
                 hybridsampler._generateDistributions(distDict, {})
     DynamicEventTree.localInitialize(self)
     if self.hybridDETstrategy == 2:
         self.actualHybridTree = utils.first(self.TreeInfo.keys())
     self._endJobRunnable = sys.maxsize
Esempio n. 7
0
 def _printState(self,which,toDoSub,poly):
   """
     Debugging tool.  Prints status of adaptive steps. Togglable in input by specifying logFile.
     @ In, which, string, the type of the next addition to make by the adaptive sampler: poly, or subset
     @ In, toDoSub, tuple(str), the next subset that will be resolved as part of the adaptive sampling
     @ In, poly, tuple(int), the polynomial within the next subset that will be added to resolve it
     @ Out, None
   """
   #print status, including error; next step to make; and existing, training, and expected values
   self.stateCounter+=1
   self.statesFile.writelines('==================== STEP %s ====================\n' %self.stateCounter)
   #write error, next adaptive move to make in this step
   self.statesFile.writelines('\n\nError: %1.9e\n' %self.error)
   self.statesFile.writelines('Next: %6s %8s %12s\n' %(which,','.join(toDoSub),str(poly)))
   #write a summary of the state of each subset sampler: existing points, training points, yet-to-try points, and their impacts on each target
   for sub in self.useSet.keys():
     self.statesFile.writelines('-'*50)
     self.statesFile.writelines('\nsubset %8s with impacts' %','.join(sub))
     for t in self.targets:
       self.statesFile.writelines(    ' [ %4s:%1.6e ] ' %(t,self.subsetImpact[t][sub]))
     self.statesFile.writelines('\n')
     #existing polynomials
     self.statesFile.writelines('ESTABLISHED:\n')
     self.statesFile.writelines('    %12s' %'polynomial')
     for t in self.targets:
       self.statesFile.writelines('  %12s' %t)
     self.statesFile.writelines('\n')
     for coeff in utils.first(self.romShell[sub].supervisedEngine.supervisedContainer[0].polyCoeffDict.values()).keys():
       self.statesFile.writelines('    %12s' %','.join(str(c) for c in coeff))
       for t in self.targets:
         self.statesFile.writelines('  %1.6e' %self.romShell[sub].supervisedEngine.supervisedContainer[0].polyCoeffDict[t][coeff])
       self.statesFile.writelines('\n')
     #polynomials in training
     if any(sub==item[1] for item in self.inTraining):
       self.statesFile.writelines('TRAINING:\n')
     for item in self.inTraining:
       if sub == item[1]:
         self.statesFile.writelines('    %12s %12s\n' %(sub,item[2]))
     #polynomials on the fringe that aren't being trained
     self.statesFile.writelines('EXPECTED:\n')
     for poly in utils.first(self.samplers[sub].expImpact.values()).keys():
       self.statesFile.writelines('    %12s' %','.join(str(c) for c in poly))
       self.statesFile.writelines('  %1.6e' %self.samplers[sub].expImpact[t][poly])
       self.statesFile.writelines('\n')
   self.statesFile.writelines('-'*50+'\n')
   #other subsets that haven't been started yet
   self.statesFile.writelines('EXPECTED SUBSETS\n')
   for sub,val in self.subsetExpImpact.items():
     self.statesFile.writelines('    %8s: %1.6e\n' %(','.join(sub),val))
   self.statesFile.writelines('\n==================== END STEP ====================\n')
Esempio n. 8
0
 def isRomConverged(self, outputDict):
     """
   This function will check the convergence of rom
   @ In, outputDict, dict, dictionary contains the metric information
     e.g. {targetName:{metricName:List of metric values}}, this dict is coming from results of cross validation
   @ Out, converged, bool, True if the rom is converged
 """
     converged = True
     # very temporary solution
     exploredTargets = []
     for cvKey, metricValues in outputDict.items():
         #for targetName, metricInfo in outputDict.items():
         # very temporary solution
         info = self.cvInstance.interface._returnCharacteristicsOfCvGivenOutputName(
             cvKey)
         if info['targetName'] in exploredTargets:
             self.raiseAnError(
                 IOError, "Multiple metrics are used in cross validation '",
                 self.cvInstance.name,
                 "'. Currently, this can not be processed by the HybridModel '",
                 self.name, "'!")
         exploredTargets.append(info['targetName'])
         name = utils.first(self.cvInstance.interface.metricsDict.keys())
         converged = self.checkErrors(info['metricType'], metricValues)
     return converged
Esempio n. 9
0
  def localInitialize(self):
    """
      Will perform all initialization specific to this Sampler.
      @ In, None
      @ Out, None
    """
    # check the source
    if self.assemblerDict['Source'][0][0] == 'Files':
      self.readingFrom = 'File'
      csvFile = self.assemblerDict['Source'][0][3]
      csvFile.open(mode='r')
      headers = [x.replace("\n","").strip() for x in csvFile.readline().split(",")]
      data = np.loadtxt(self.assemblerDict['Source'][0][3], dtype=np.float, delimiter=',', skiprows=1, ndmin=2)
      lenRlz = len(data)
      csvFile.close()
      for var in self.toBeSampled.keys():
        for subVar in var.split(','):
          subVar = subVar.strip()
          sourceName = self.nameInSource[subVar]
          if sourceName not in headers:
            self.raiseAnError(IOError, "variable "+ sourceName + " not found in the file "
                    + csvFile.getFilename())
          self.pointsToSample[subVar] = data[:,headers.index(sourceName)]
          subVarPb = 'ProbabilityWeight-'
          if subVarPb+sourceName in headers:
            self.infoFromCustom[subVarPb+subVar] = data[:, headers.index(subVarPb+sourceName)]
          else:
            self.infoFromCustom[subVarPb+subVar] = np.ones(lenRlz)
      if 'PointProbability' in headers:
        self.infoFromCustom['PointProbability'] = data[:,headers.index('PointProbability')]
      else:
        self.infoFromCustom['PointProbability'] = np.ones(lenRlz)
      if 'ProbabilityWeight' in headers:
        self.infoFromCustom['ProbabilityWeight'] = data[:,headers.index('ProbabilityWeight')]
      else:
        self.infoFromCustom['ProbabilityWeight'] = np.ones(lenRlz)

      self.limit = len(utils.first(self.pointsToSample.values()))
    else:
      self.readingFrom = 'DataObject'
      dataObj = self.assemblerDict['Source'][0][3]
      lenRlz = len(dataObj)
      dataSet = dataObj.asDataset()
      self.pointsToSample = dataObj.sliceByIndex(dataObj.sampleTag)
      for var in self.toBeSampled.keys():
        for subVar in var.split(','):
          subVar = subVar.strip()
          sourceName = self.nameInSource[subVar]
          if sourceName not in dataObj.getVars() + dataObj.getVars('indexes'):
            self.raiseAnError(IOError,"the variable "+ sourceName + " not found in "+ dataObj.type + " " + dataObj.name)
      self.limit = len(self.pointsToSample)
    # if "index" provided, limit sampling to those points
    if self.indexes is not None:
      self.limit = len(self.indexes)
      maxIndex = max(self.indexes)
      if maxIndex > len(self.pointsToSample) -1:
        self.raiseAnError(IndexError,'Requested index "{}" from custom sampler, but highest index sample is "{}"!'.format(maxIndex,len(self.pointsToSample)-1))
    #TODO: add restart capability here!
    if self.restartData:
      self.raiseAnError(IOError,"restart capability not implemented for CustomSampler yet!")
Esempio n. 10
0
 def readFromROM(self):
     """
   Reads in required information from ROM and returns a sample supervisedLearning object.
   @ In, None
   @ Out, SVL, supervisedLearning object, SVL object
 """
     self.ROM = self.assemblerDict['ROM'][0][3]
     SVLs = self.ROM.supervisedEngine.supervisedContainer
     SVL = utils.first(SVLs)
     self.features = SVL.features
     self.sparseGridType = SVL.sparseGridType.lower()
     return SVL
Esempio n. 11
0
 def localStillReady(self,ready): #, lastOutput= None
   """
     first perform some check to understand what it needs to be done possibly perform an early return
     ready is returned
     @ In,  ready, bool, a boolean representing whether the caller is prepared for another input.
     @ Out, ready, bool, a boolean representing whether the caller is prepared for another input.
   """
   if self.counter == 0               : return     True
   if len(self.RunQueue['queue']) != 0: detReady = True
   else                               : detReady = False
   # since the RunQueue is empty, let's check if there are still branches running => if not => start the adaptive search
   self._checkIfStartAdaptive()
   if self.startAdaptive:
     #if self._endJobRunnable != 1: self._endJobRunnable = 1
     # retrieve the endHistory branches
     completedHistNames, finishedHistNames = [], []
     hybridTrees = self.TreeInfo.values() if self.hybridDETstrategy in [1,None] else [self.TreeInfo[self.actualHybridTree]]
     for treer in hybridTrees: # this needs to be solved
       for ending in treer.iterProvidedFunction(self._checkCompleteHistory):
         completedHistNames.append(self.lastOutput.getParam(typeVar='inout',keyword='none',nodeId=ending.get('name'),serialize=False))
         finishedHistNames.append(utils.first(completedHistNames[-1].keys()))
     # assemble a dictionary
     if len(completedHistNames) > self.completedHistCnt:
       # sort the list of histories
       self.sortedListOfHists.extend(list(set(finishedHistNames) - set(self.sortedListOfHists)))
       completedHistNames = [completedHistNames[finishedHistNames.index(elem)] for elem in self.sortedListOfHists]
       if len(completedHistNames[-1].values()) > 0:
         lastOutDict = {'inputs':{},'outputs':{}}
         for histd in completedHistNames:
           histdict = histd.values()[-1]
           for key in histdict['inputs' ].keys():
             if key not in lastOutDict['inputs'].keys(): lastOutDict['inputs'][key] = np.atleast_1d(histdict['inputs'][key])
             else                                      : lastOutDict['inputs'][key] = np.concatenate((np.atleast_1d(lastOutDict['inputs'][key]),np.atleast_1d(histdict['inputs'][key])))
           for key in histdict['outputs'].keys():
             if key not in lastOutDict['outputs'].keys(): lastOutDict['outputs'][key] = np.atleast_1d(histdict['outputs'][key])
             else                                       : lastOutDict['outputs'][key] = np.concatenate((np.atleast_1d(lastOutDict['outputs'][key]),np.atleast_1d(histdict['outputs'][key])))
       else: self.raiseAWarning('No Completed HistorySet! Not possible to start an adaptive search! Something went wrong!')
     if len(completedHistNames) > self.completedHistCnt:
       actualLastOutput      = self.lastOutput
       self.lastOutput       = copy.deepcopy(lastOutDict)
       ready                 = LimitSurfaceSearch.localStillReady(self,ready)
       self.lastOutput       = actualLastOutput
       self.completedHistCnt = len(completedHistNames)
       self.raiseAMessage("Completed full histories are "+str(self.completedHistCnt))
     else: ready = False
     self.adaptiveReady = ready
     if ready or detReady: return True
     else                : return False
   return detReady
Esempio n. 12
0
 def _readdressEvaluateConstResponse(self, edict):
     """
   Method to re-address the evaluate base class method in order to avoid wasting time
   in case the training set has an unique response (e.g. if 10 points in the training set,
   and the 10 outcomes are all == to 1, this method returns one without the need of an
   evaluation)
   @ In, edict, dict, prediction request. Not used in this method (kept the consistency with evaluate method)
   @ Out, returnDict, dict, dictionary with the evaluation (in this case, the constant number)
 """
     returnDict = {}
     #get the number of inputs provided to this ROM to evaluate
     numInputs = len(utils.first(edict.values()))
     #fill the target values
     for index, target in enumerate(self.target):
         returnDict[target] = np.ones(numInputs) * self.myNumber[index]
     return returnDict
Esempio n. 13
0
    def _computeSortedWeightsAndPoints(self, arrayIn, pbWeight, percent):
        """
      Method to compute the sorted weights and points
      @ In, arrayIn, list/numpy.array, the array of values from which the percentile needs to be estimated
      @ In, pbWeight, list/numpy.array, the reliability weights that correspond to the values in 'array'
      @ In, percent, float, the percentile that needs to be computed (between 0.01 and 1.0)
      @ Out, sortedWeightsAndPoints, list/numpy.array, with [:,0] as the value of the probability density function at the bin, normalized, and [:,1] is the coresonding edge of the probability density function.
      @ Out, indexL, index of the lower quantile
    """

        idxs = np.argsort(np.asarray(list(zip(pbWeight, arrayIn)))[:, 1])
        sortedWeightsAndPoints = np.asarray(
            list(zip(pbWeight[idxs], arrayIn[idxs])))
        weightsCDF = np.cumsum(sortedWeightsAndPoints[:, 0])
        indexL = utils.first(np.asarray(weightsCDF >= percent).nonzero())[0]
        return sortedWeightsAndPoints, indexL
Esempio n. 14
0
 def collectOutput(self,finishedJob, output):
   """
     Function to place all of the computed data into the output object, i.e. Files
     @ In, finishedJob, object, JobHandler object that is in charge of running this postprocessor
     @ In, output, object, the object where we want to place our computed results
     @ Out, None
   """
   evaluation = finishedJob.getEvaluation()
   if isinstance(evaluation, Runners.Error):
     self.raiseAnError(RuntimeError, ' No available output to collect')
   outputDict = evaluation[1]
   if self.cvScore is not None:
     output.addRealization(outputDict)
   else:
     cvIDs = {self.cvID: np.atleast_1d(range(len(utils.first(outputDict.values()))))}
     outputDict.update(cvIDs)
     output.load(outputDict, style='dict')
Esempio n. 15
0
 def run(self, inputIn):
     """
   This method executes the postprocessor action.
   @ In,  inputIn, list, list of DataObjects
   @ Out, outputDict, dict, dictionary of outputs
 """
     inputDict = self.inputToInternal(inputIn)
     targetDict = inputDict['target']
     classifierDict = inputDict['classifier']
     outputDict = {}
     outputDict.update(inputDict['target']['data'])
     outputType = targetDict['type']
     numRlz = utils.first(targetDict['input'].values()).size
     outputDict[self.label] = []
     for i in range(numRlz):
         tempTargDict = {}
         for param, vals in targetDict['input'].items():
             tempTargDict[param] = vals[i]
         for param, vals in targetDict['output'].items():
             tempTargDict[param] = vals[i]
         tempClfList = []
         labelIndex = None
         for key, values in classifierDict['input'].items():
             calcVal = self.funcDict[key].evaluate("evaluate", tempTargDict)
             inds, = np.where(np.asarray(values) == calcVal)
             if labelIndex is None:
                 labelIndex = set(inds)
             else:
                 labelIndex = labelIndex & set(inds)
         if len(labelIndex) != 1:
             self.raiseAnError(
                 IOError, "The parameters", ",".join(tempTargDict.keys()),
                 "with values",
                 ",".join([str(el) for el in tempTargDict.values()]),
                 "could not be put in any class!")
         label = classifierDict['output'][self.label][list(labelIndex)[0]]
         if outputType == 'PointSet':
             outputDict[self.label].append(label)
         else:
             outputDict[self.label].append(
                 np.asarray([label] * targetDict['historySizes'][i]))
     outputDict[self.label] = np.asarray(outputDict[self.label])
     outputDict = {'data': outputDict, 'dims': inputDict['target']['dims']}
     return outputDict
Esempio n. 16
0
 def _findHighestImpactIndex(self, returnValue=False):
     """
   Finds and returns the index with the highest average expected impact factor across all targets
   Can optionally return the value of the highest impact, as well.
   @ In, returnValue, bool, optional, returns the value of the index if True
   @ Out, point, tuple(int), polynomial index with greatest expected effect
 """
     point = None
     avg = 0
     for pt in utils.first(self.expImpact.values()).keys():
         new = sum(self.expImpact[t][pt]
                   for t in self.targets) / len(self.targets)
         if avg < new:
             avg = new
             point = pt
     self.raiseADebug('Highest impact point is', point,
                      'with expected average impact', avg)
     if returnValue:
         return point, avg
     else:
         return point
Esempio n. 17
0
 def _printToLog(self):
     """
   Prints adaptive state of this sampler to the log file.
   @ In, None
   @ Out, None
 """
     self.logCounter += 1
     pl = 4 * len(self.features) + 1
     f = open(self.logFile, 'a')
     f.writelines('===================== STEP %i =====================\n' %
                  self.logCounter)
     f.writelines('\nNumber of Runs: %i\n' %
                  len(self.pointsNeededToMakeROM))
     f.writelines('Error: %1.9e\n' % self.error)
     f.writelines('Features: %s\n' % ','.join(self.features))
     f.writelines('\nExisting indices:\n')
     f.writelines('    {:^{}}:'.format('poly', pl))
     for t in self.targets:
         f.writelines('  {:<16}'.format(t))
     f.writelines('\n')
     for idx in self.indexSet.points:
         f.writelines('    {:^{}}:'.format(idx, pl))
         for t in self.targets:
             f.writelines('  {:<9}'.format(self.actImpact[t][idx]))
         f.writelines('\n')
     f.writelines('\nPredicted indices:\n')
     f.writelines('    {:^{}}:'.format('poly', pl))
     for t in self.targets:
         f.writelines('  {:<16}'.format(t))
     f.writelines('\n')
     for idx in utils.first(self.expImpact.values()).keys():
         f.writelines('    {:^{}}:'.format(idx, pl))
         for t in self.targets:
             f.writelines('  {:<9}'.format(self.expImpact[t][idx]))
         f.writelines('\n')
     f.writelines('===================== END STEP =====================\n')
     f.close()
Esempio n. 18
0
 def localGenerateInput(self, model, myInput):
     """
   Function to select the next most informative point for refining the limit
   surface search.
   After this method is called, the self.inputInfo should be ready to be sent
   to the model
   @ In, model, model instance, an instance of a model
   @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
   @ Out, None
 """
     self.inputInfo['distributionName'] = {
     }  #Used to determine which distribution to change if needed.
     self.inputInfo['distributionType'] = {
     }  #Used to determine which distribution type is used
     weight = 1.0
     recastDict = {}
     for i in range(len(self.axisName)):
         varName = self.axisName[i]
         if self.gridInfo[varName] == 'CDF':
             if self.distDict[varName].getDimensionality() == 1:
                 recastDict[varName] = [self.distDict[varName].ppf]
             else:
                 recastDict[varName] = [
                     self.distDict[varName].inverseMarginalDistribution,
                     [
                         self.variables2distributionsMapping[varName]['dim']
                         - 1
                     ]
                 ]
         elif self.gridInfo[varName] != 'value':
             self.raiseAnError(
                 IOError, self.gridInfo[varName] +
                 ' is not know as value keyword for type. Sampler: ' +
                 self.name)
     if self.externalgGridCoord:
         currentIndexes = self.gridEntity.returnIteratorIndexesFromIndex(
             self.gridCoordinate)
         coordinates = self.gridEntity.returnCoordinateFromIndex(
             self.gridCoordinate, True, recastDict)
     else:
         currentIndexes = self.gridEntity.returnIteratorIndexes()
         coordinates = self.gridEntity.returnPointAndAdvanceIterator(
             True, recastDict)
     if coordinates == None:
         self.raiseADebug(
             'Grid finished with restart points!  Moving on...')
         raise utils.NoMoreSamplesNeeded
     coordinatesPlusOne = self.gridEntity.returnShiftedCoordinate(
         currentIndexes, dict.fromkeys(self.axisName, 1))
     coordinatesMinusOne = self.gridEntity.returnShiftedCoordinate(
         currentIndexes, dict.fromkeys(self.axisName, -1))
     for i in range(len(self.axisName)):
         varName = self.axisName[i]
         # compute the SampledVarsPb for 1-D distribution
         if ("<distribution>" in varName) or (
                 self.variables2distributionsMapping[varName]['totDim']
                 == 1):
             for key in varName.strip().split(','):
                 self.inputInfo['distributionName'][key] = self.toBeSampled[
                     varName]
                 self.inputInfo['distributionType'][key] = self.distDict[
                     varName].type
                 self.values[key] = coordinates[varName]
                 self.inputInfo['SampledVarsPb'][key] = self.distDict[
                     varName].pdf(self.values[key])
         # compute the SampledVarsPb for N-D distribution
         else:
             if self.variables2distributionsMapping[varName][
                     'reducedDim'] == 1:
                 # to avoid double count;
                 distName = self.variables2distributionsMapping[varName][
                     'name']
                 ndCoordinate = [0] * len(
                     self.distributions2variablesMapping[distName])
                 positionList = self.distributions2variablesIndexList[
                     distName]
                 for var in self.distributions2variablesMapping[distName]:
                     variable = utils.first(var.keys())
                     position = utils.first(var.values())
                     ndCoordinate[positionList.index(position)] = float(
                         coordinates[variable.strip()])
                     for key in variable.strip().split(','):
                         self.inputInfo['distributionName'][
                             key] = self.toBeSampled[variable]
                         self.inputInfo['distributionType'][
                             key] = self.distDict[variable].type
                         self.values[key] = coordinates[variable]
                 # Based on the discussion with Diego, we will use the following to compute SampledVarsPb.
                 self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                     varName].pdf(ndCoordinate)
         # Compute the ProbabilityWeight
         if ("<distribution>" in varName) or (
                 self.variables2distributionsMapping[varName]['totDim']
                 == 1):
             if self.distDict[varName].getDisttype() == 'Discrete':
                 weight *= self.distDict[varName].pdf(coordinates[varName])
             else:
                 if self.gridInfo[varName] == 'CDF':
                     if coordinatesPlusOne[
                             varName] != sys.maxsize and coordinatesMinusOne[
                                 varName] != -sys.maxsize:
                         midPlusCDF = (coordinatesPlusOne[varName] +
                                       self.distDict[varName].cdf(
                                           self.values[key])) / 2.0
                         midMinusCDF = (coordinatesMinusOne[varName] +
                                        self.distDict[varName].cdf(
                                            self.values[key])) / 2.0
                         self.inputInfo[
                             'ProbabilityWeight-' + varName.replace(
                                 ",", "-")] = midPlusCDF - midMinusCDF
                         weight *= midPlusCDF - midMinusCDF
                     if coordinatesMinusOne[varName] == -sys.maxsize:
                         midPlusCDF = (coordinatesPlusOne[varName] +
                                       self.distDict[varName].cdf(
                                           self.values[key])) / 2.0
                         midMinusCDF = 0.0
                         self.inputInfo[
                             'ProbabilityWeight-' + varName.replace(
                                 ",", "-")] = midPlusCDF - midMinusCDF
                         weight *= midPlusCDF - midMinusCDF
                     if coordinatesPlusOne[varName] == sys.maxsize:
                         midPlusCDF = 1.0
                         midMinusCDF = (coordinatesMinusOne[varName] +
                                        self.distDict[varName].cdf(
                                            self.values[key])) / 2.0
                         self.inputInfo[
                             'ProbabilityWeight-' + varName.replace(
                                 ",", "-")] = midPlusCDF - midMinusCDF
                         weight *= midPlusCDF - midMinusCDF
                 else:
                     # Value
                     if coordinatesPlusOne[
                             varName] != sys.maxsize and coordinatesMinusOne[
                                 varName] != -sys.maxsize:
                         midPlusValue = (self.values[key] +
                                         coordinatesPlusOne[varName]) / 2.0
                         midMinusValue = (
                             self.values[key] +
                             coordinatesMinusOne[varName]) / 2.0
                         weight *= self.distDict[varName].cdf(
                             midPlusValue) - self.distDict[varName].cdf(
                                 midMinusValue)
                         self.inputInfo[
                             'ProbabilityWeight-' +
                             varName.replace(",", "-")] = self.distDict[
                                 varName].cdf(midPlusValue) - self.distDict[
                                     varName].cdf(midMinusValue)
                     if coordinatesMinusOne[varName] == -sys.maxsize:
                         midPlusValue = (self.values[key] +
                                         coordinatesPlusOne[varName]) / 2.0
                         self.inputInfo[
                             'ProbabilityWeight-' +
                             varName.replace(",", "-")] = self.distDict[
                                 varName].cdf(midPlusValue) - 0.0
                         weight *= self.distDict[varName].cdf(
                             midPlusValue) - 0.0
                     if coordinatesPlusOne[varName] == sys.maxsize:
                         midMinusValue = (
                             self.values[key] +
                             coordinatesMinusOne[varName]) / 2.0
                         self.inputInfo[
                             'ProbabilityWeight-' + varName.replace(
                                 ",", "-")] = 1.0 - self.distDict[
                                     varName].cdf(midMinusValue)
                         weight *= 1.0 - self.distDict[varName].cdf(
                             midMinusValue)
         # ND variable
         else:
             if self.variables2distributionsMapping[varName][
                     'reducedDim'] == 1:
                 # to avoid double count of weight for ND distribution; I need to count only one variable instaed of N
                 distName = self.variables2distributionsMapping[varName][
                     'name']
                 ndCoordinate = np.zeros(
                     len(self.distributions2variablesMapping[distName]))
                 dxs = np.zeros(
                     len(self.distributions2variablesMapping[distName]))
                 positionList = self.distributions2variablesIndexList[
                     distName]
                 for var in self.distributions2variablesMapping[distName]:
                     variable = utils.first(var.keys()).strip()
                     position = utils.first(var.values())
                     if self.gridInfo[variable] == 'CDF':
                         if coordinatesPlusOne[
                                 variable] != sys.maxsize and coordinatesMinusOne[
                                     variable] != -sys.maxsize:
                             up = self.distDict[
                                 variable].inverseMarginalDistribution(
                                     coordinatesPlusOne[variable],
                                     self.variables2distributionsMapping[
                                         variable]['dim'] - 1)
                             down = self.distDict[
                                 variable].inverseMarginalDistribution(
                                     coordinatesMinusOne[variable],
                                     self.variables2distributionsMapping[
                                         variable]['dim'] - 1)
                             dxs[positionList.index(
                                 position)] = (up - down) / 2.0
                             ndCoordinate[positionList.index(
                                 position
                             )] = coordinates[variable] - (
                                 coordinates[variable] - down) / 2.0 + dxs[
                                     positionList.index(position)] / 2.0
                         if coordinatesMinusOne[variable] == -sys.maxsize:
                             up = self.distDict[
                                 variable].inverseMarginalDistribution(
                                     coordinatesPlusOne[variable],
                                     self.variables2distributionsMapping[
                                         variable]['dim'] - 1)
                             dxs[positionList.index(position)] = (
                                 coordinates[variable.strip()] +
                                 up) / 2.0 - self.distDict[
                                     varName].returnLowerBound(
                                         positionList.index(position))
                             ndCoordinate[positionList.index(position)] = (
                                 (coordinates[variable.strip()] + up) / 2.0
                                 + self.distDict[varName].returnLowerBound(
                                     positionList.index(position))) / 2.0
                         if coordinatesPlusOne[variable] == sys.maxsize:
                             down = self.distDict[
                                 variable].inverseMarginalDistribution(
                                     coordinatesMinusOne[variable],
                                     self.variables2distributionsMapping[
                                         variable]['dim'] - 1)
                             dxs[positionList.index(
                                 position
                             )] = self.distDict[varName].returnUpperBound(
                                 positionList.index(position)) - (
                                     coordinates[variable.strip()] +
                                     down) / 2.0
                             ndCoordinate[positionList.index(position)] = (
                                 self.distDict[varName].returnUpperBound(
                                     positionList.index(position)) +
                                 (coordinates[variable.strip()] + down) /
                                 2.0) / 2.0
                     else:
                         if coordinatesPlusOne[
                                 variable] != sys.maxsize and coordinatesMinusOne[
                                     variable] != -sys.maxsize:
                             dxs[positionList.index(position)] = (
                                 coordinatesPlusOne[variable] -
                                 coordinatesMinusOne[variable]) / 2.0
                             ndCoordinate[positionList.index(
                                 position
                             )] = coordinates[variable.strip()] - (
                                 coordinates[variable.strip()] -
                                 coordinatesMinusOne[variable]) / 2.0 + dxs[
                                     positionList.index(position)] / 2.0
                         if coordinatesMinusOne[variable] == -sys.maxsize:
                             dxs[positionList.index(position)] = (
                                 coordinates[variable.strip()] +
                                 coordinatesPlusOne[variable]
                             ) / 2.0 - self.distDict[
                                 varName].returnLowerBound(
                                     positionList.index(position))
                             ndCoordinate[positionList.index(position)] = (
                                 (coordinates[variable.strip()] +
                                  coordinatesPlusOne[variable]) / 2.0 +
                                 self.distDict[varName].returnLowerBound(
                                     positionList.index(position))) / 2.0
                         if coordinatesPlusOne[variable] == sys.maxsize:
                             dxs[positionList.index(
                                 position
                             )] = self.distDict[varName].returnUpperBound(
                                 positionList.index(position)) - (
                                     coordinates[variable.strip()] +
                                     coordinatesMinusOne[variable]) / 2.0
                             ndCoordinate[positionList.index(position)] = (
                                 self.distDict[varName].returnUpperBound(
                                     positionList.index(position)) +
                                 (coordinates[variable.strip()] +
                                  coordinatesMinusOne[variable]) /
                                 2.0) / 2.0
                 self.inputInfo['ProbabilityWeight-' + varName.replace(
                     ",", "!")] = self.distDict[varName].cellIntegral(
                         ndCoordinate, dxs)
                 weight *= self.distDict[varName].cellIntegral(
                     ndCoordinate, dxs)
     self.inputInfo['PointProbability'] = reduce(
         mul, self.inputInfo['SampledVarsPb'].values())
     self.inputInfo['ProbabilityWeight'] = copy.deepcopy(weight)
     self.inputInfo['SamplerType'] = 'Grid'
Esempio n. 19
0
    def run(self, inputDic):
        """
      @ In, inputDic, list, list of dictionaries which contains the data inside the input DataObjects
      @ Out, outputDic, dict, dictionary which contains the data to be collected by output DataObject
    """
        if len(inputDic) > 1:
            self.raiseAnError(
                IOError,
                self.__class__.__name__ + ' Interfaced Post-Processor ' +
                str(self.name) + ' accepts only one dataObject')

        #get actual data
        inputDict = inputDic[0]['data']
        #identify features
        self.features = inputDic[0]['outVars']
        #don't keep the pivot parameter in the feature space
        if self.pivotParameter in self.features:
            self.features.remove(self.pivotParameter)

        #if output length (size of desired output history) not set, set it now
        if self.outputLen is None:
            self.outputLen = np.asarray(inputDict['output'][utils.first(
                inputDict['output'].keys())][self.pivotParameter])[-1]

        ## Check if data is synchronized
        referenceHistory = 0
        referenceTimeAxis = inputDict[self.pivotParameter][referenceHistory]
        for hist in range(inputDic[0]['numberRealizations']):
            if str(inputDict[self.pivotParameter][hist]) != str(
                    referenceTimeAxis):
                errorMessage = '{} Interfaced Post-Processor "{}": one or more histories in the historySet have different time scales (e.g., reference points: {} and {})'.format(
                    self.__class__.__name__, self.name, referenceHistory, hist)
                self.raiseAnError(IOError, errorMessage)

        # task: reshape the data into histories with the size of the output I'm looking for
        #data dictionaries have form {historyNumber:{VarName:[data], VarName:[data]}}
        reshapedData = {}
        newHistoryCounter = 0  #new history tracking labels
        for historyNumber in range(inputDic[0]['numberRealizations']):
            #array of the pivot values provided in the history
            pivotValues = np.asarray(
                inputDict[self.pivotParameter][historyNumber])
            #if the desired output pivot value length is (equal to or) longer than the provided history ...
            #   -> (i.e. I have a year and I want output of a year)
            if self.outputLen >= pivotValues[-1]:
                #don't change the shape of this history; it's fine as is
                reshapedData[newHistoryCounter] = self.retrieveHistory(
                    inputDict, historyNumber)
                newHistoryCounter += 1
            #if the provided history is longer than the requested output period
            #   -> (i.e., I have a year of data and I only want output of 1 year)
            else:
                #reshape the history into multiple histories to use
                startPivot = 0
                endPivot = self.outputLen
                # until you find the last observed pivot point...
                while endPivot <= pivotValues[-1]:
                    #create a storage place for each new usable history
                    reshapedData[newHistoryCounter] = {}
                    # acceptable is if the pivot value is greater than start and less than end
                    extractCondition = np.logical_and(
                        pivotValues >= startPivot, pivotValues <= endPivot)
                    # extract out the acceptable parts from the pivotValues, and reset the base pivot point to 0
                    reshapedData[newHistoryCounter][
                        self.pivotParameter] = np.extract(
                            extractCondition, pivotValues) - startPivot
                    # for each feature...
                    for feature in self.features:
                        # extract applicable information from the feature set
                        reshapedData[newHistoryCounter][feature] = np.extract(
                            extractCondition,
                            inputDict[feature][historyNumber])
                    #increment history counter
                    newHistoryCounter += 1
                    #update new start/end points for grabbing the next history
                    startPivot = endPivot
                    endPivot += self.outputLen

        inputDict['output'] = reshapedData
        self.numHistory = len(inputDict['output'].keys(
        ))  #should be same as newHistoryCounter - 1, if that's faster
        #update the set of pivot parameter values to match the first of the reshaped histories
        self.pivotValues = np.asarray(inputDict['output'][utils.first(
            inputDict['output'].keys())][self.pivotParameter])

        # task: split the history into multiple subsequences so that the typical history can be constructed
        #  -> i.e., split the year history into multiple months, so we get a typical January, February, ..., hence a typical year
        # start by identifying the subsequences within the histories
        self.subsequence = [
        ]  #list of start/stop pivot values for the subsequences
        startLocation = 0  #tracks the point in the history being evaluated
        n = 0  #counts the number of the subsequence
        # in this loop we collect the similar (in time) subsequences in each history
        while True:
            subsequenceLength = self.subseqLen[n % len(self.subseqLen)]
            # if the history is longer than the subsequence we need, take the whole subsequence
            if startLocation + subsequenceLength < self.pivotValues[-1]:
                self.subsequence.append(
                    [startLocation, startLocation + subsequenceLength])
            # otherwise, take only as much as the history has, and exit
            else:
                self.subsequence.append([startLocation, self.pivotValues[-1]])
                break  # TODO this could be made "while startLocation + subsequenceLength < self.pivotValues[-1]
            # iterate forward
            startLocation += subsequenceLength
            n += 1
        numParallelSubsequences = len(self.subsequence)

        #now that the subsequences are identified, collect the data
        # for the record, defaultdict is a dict that auto-populates using the constructer given if an element isn't present
        subseqData = defaultdict(
            dict
        )  # eventually {'all':{feature:[[parallel output data]], feature:[[parallel output data]]},
        #                                    subseqIndex:{pivotParam:pivotValues[-1]},
        #                                                 feature:[[parallel data]]}
        # 'all' means all the feature data is included,
        #     while the subseqIndex dictionaries only contain the relevant subsequence data (i.e., the monthly data)
        # stack the similar histories in numpy arrays for full period (for example, by year)
        for feature in self.features:
            subseqData['all'][feature] = np.concatenate(
                list(inputDict['output'][h][feature]
                     for h in inputDict['output'].keys()))

        # gather feature data by subsequence (for example, by month)
        for index in range(numParallelSubsequences):
            extractCondition = np.logical_and(
                self.pivotValues >= self.subsequence[index][0],
                self.pivotValues < self.subsequence[index][1])
            subseqData[index][self.pivotParameter] = np.extract(
                extractCondition, self.pivotValues)
            #get the pivot parameter entries as well, but only do it once, at the end
            if self.pivotValues[-1] == self.subsequence[index][1]:
                subseqData[index][self.pivotParameter] = np.concatenate(
                    (subseqData[index][self.pivotParameter],
                     np.asarray([self.pivotValues[-1]])))
            #get the subsequence data for each feature, for each history
            for feature in self.features:
                subseqData[index][feature] = np.zeros(
                    shape=(self.numHistory,
                           len(subseqData[index][self.pivotParameter])))
                for h, historyNumber in enumerate(inputDict['output'].keys()):
                    if self.pivotValues[-1] == self.subsequence[index][1]:
                        #TODO this is doing the right action, but it's strange that we need to add one extra element.
                        #  Maybe this should be fixed where we set the self.subsequence[index][1] for the last index, instead of patched here
                        subseqData[index][feature][h, 0:-1] = np.extract(
                            extractCondition,
                            inputDict['output'][historyNumber][feature])
                        subseqData[index][feature][h, -1] = inputDict[
                            'output'][historyNumber][feature][-1]
                    else:
                        subseqData[index][feature][h, :] = np.extract(
                            extractCondition,
                            inputDict['output'][historyNumber][feature])

        # task: compare CDFs to find the nearest match to the collective time's standard CDF (see the paper ref'd in the manual)
        # start by building the CDFs in the same structure as subseqData
        # for the record, defaultdict is a dict that auto-populates using the constructer given if an element isn't present
        cdfData = defaultdict(
            dict
        )  # eventually {'all':{feature:[monotonically increasing floats], feature:[monotonically increasing floats]},
        #                                    subseqIndex:{pivotParam:pivotValues[-1]},
        #                                                 feature:[monotonically increasing floats]}
        # TODO there surely is a faster way to do this than triple-for-loops
        for feature in self.features:
            #construct reasonable bins for feature
            numBins, binEdges = mathUtils.numBinsDraconis(
                subseqData['all'][feature])
            #get the empirical CDF by bin for entire history (e.g., full year or even multiple years)
            cdfData['all'][feature] = self.__computeECDF(
                subseqData['all'][feature], binEdges)
            #get the empirical CDF by bin for subsequence (e.g., for a month)
            for index in range(numParallelSubsequences):
                cdfData[index][feature] = np.zeros(shape=(self.numHistory,
                                                          numBins))
                for h in range(self.numHistory):
                    cdfData[index][feature][h, :] = self.__computeECDF(
                        subseqData[index][feature][h, :], binEdges)

        # now determine which subsequences are the most typical, using the CDF
        # find the smallestDeltaCDF and its index so the typical data can be set
        # first, find and store them by history
        typicalDataHistories = {}
        for index in range(numParallelSubsequences):
            typicalDataHistories[index] = {}
            typicalDataHistories[index][
                self.pivotParameter] = subseqData[index][self.pivotParameter]
            smallestDeltaCDF = np.inf
            smallestDeltaIndex = numParallelSubsequences + 1  #initialized as bogus index to preserve errors
            for h in range(
                    self.numHistory
            ):  # for h, historyNumber in enumerate(inputDict['output'].keys()):
                delta = sum(
                    self.__computeDist(cdfData['all'][feature], cdfData[index]
                                       [feature][h, :])
                    for feature in self.features)
                if delta < smallestDeltaCDF:
                    smallestDeltaCDF = delta
                    smallestDeltaIndex = h
            for feature in self.features:
                typicalDataHistories[index][feature] = subseqData[index][
                    feature][smallestDeltaIndex, :]
        # now collapse the data into the typical history
        typicalData = {}
        typicalData[self.pivotParameter] = np.concatenate(
            list(typicalDataHistories[index][self.pivotParameter]
                 for index in range(numParallelSubsequences)))
        for feature in self.features:
            typicalData[feature] = np.concatenate(
                list(typicalDataHistories[index][feature]
                     for index in range(numParallelSubsequences)))
        # sanity check, should probably be skipped for efficiency, as it looks like a debugging tool
        # preserved for now in case it was important for an undiscovered reason
        #   for t in range(1,len(typicalData[self.pivotParameter])):
        #      if typicalData[self.pivotParameter][t] < typicalData[self.pivotParameter][t-1]:
        #        self.raiseAnError(RuntimeError,'Something went wrong with the TypicalHistorySet!  Expected calculated data is missing.')

        # task: collect data as expected by RAVEN
        outputDict = {'data': {}}
        # typical history
        for var in typicalData.keys():
            outputDict['data'][var] = np.zeros(1, dtype=object)
            outputDict['data'][var][0] = typicalData[var]
        # preserve input data
        for var in inputDic[0]['inpVars']:
            outputDict['data'][var] = np.zeros(1, dtype=object)
            outputDict['data'][var][0] = inputDict[var][0]
        outputDict['dims'] = {}
        for var in self.features:
            outputDict['dims'][var] = [self.pivotParameter]
        return outputDict
Esempio n. 20
0
    def localGenerateInput(self, model, myInput):
        """
      Function to select the next most informative point for refining the limit
      surface search.
      After this method is called, the self.inputInfo should be ready to be sent
      to the model
      @ In, model, model instance, an instance of a model
      @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
      @ Out, None
    """
        # create values dictionary
        weight = 1.0
        for key in self.distDict:
            # check if the key is a comma separated list of strings
            # in this case, the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables

            dim = self.variables2distributionsMapping[key]['dim']
            totDim = self.variables2distributionsMapping[key]['totDim']
            dist = self.variables2distributionsMapping[key]['name']
            reducedDim = self.variables2distributionsMapping[key]['reducedDim']
            weight = 1.0
            if totDim == 1:
                for var in self.distributions2variablesMapping[dist]:
                    varID = utils.first(var.keys())
                    if self.samplingType == 'uniform':
                        distData = self.distDict[key].getCrowDistDict()
                        if ('xMin' not in distData.keys()) or (
                                'xMax' not in distData.keys()):
                            self.raiseAnError(
                                IOError,
                                "In the Monte-Carlo sampler a uniform sampling type has been chosen; however, one or more distributions have not specified either the lowerBound or the upperBound"
                            )
                        lower = distData['xMin']
                        upper = distData['xMax']
                        rvsnum = lower + (upper - lower) * randomUtils.random()
                        epsilon = (upper - lower) / self.limit
                        midPlusCDF = self.distDict[key].cdf(rvsnum + epsilon)
                        midMinusCDF = self.distDict[key].cdf(rvsnum - epsilon)
                        weight *= midPlusCDF - midMinusCDF
                    else:
                        rvsnum = self.distDict[key].rvs()
                    self.inputInfo['SampledVarsPb'][key] = self.distDict[
                        key].pdf(rvsnum)
                    for kkey in varID.strip().split(','):
                        self.values[kkey] = np.atleast_1d(rvsnum)[0]
                    self.inputInfo['ProbabilityWeight-' + varID] = 1.
            elif totDim > 1:
                if reducedDim == 1:
                    if self.samplingType is None:
                        rvsnum = self.distDict[key].rvs()
                        coordinate = np.atleast_1d(rvsnum).tolist()
                    else:
                        coordinate = np.zeros(totDim)
                        for i in range(totDim):
                            lower = self.distDict[key].returnLowerBound(i)
                            upper = self.distDict[key].returnUpperBound(i)
                            coordinate[i] = lower + (
                                upper - lower) * randomUtils.random()
                    if reducedDim > len(coordinate):
                        self.raiseAnError(
                            IOError,
                            "The dimension defined for variables drew from the multivariate normal distribution is exceeded by the dimension used in Distribution (MultivariateNormal) "
                        )
                    probabilityValue = self.distDict[key].pdf(coordinate)
                    self.inputInfo['SampledVarsPb'][key] = probabilityValue
                    for var in self.distributions2variablesMapping[dist]:
                        varID = utils.first(var.keys())
                        varDim = var[varID]
                        for kkey in varID.strip().split(','):
                            self.values[kkey] = np.atleast_1d(rvsnum)[varDim -
                                                                      1]
                    self.inputInfo['ProbabilityWeight-' + dist] = 1.
            else:
                self.raiseAnError(
                    IOError,
                    "Total dimension for given distribution should be >= 1")

        if len(self.inputInfo['SampledVarsPb'].keys()) > 0:
            self.inputInfo['PointProbability'] = reduce(
                mul, self.inputInfo['SampledVarsPb'].values())
        else:
            self.inputInfo['PointProbability'] = 1.0
        if self.samplingType == 'uniform':
            self.inputInfo['ProbabilityWeight'] = weight
        else:
            self.inputInfo[
                'ProbabilityWeight'] = 1.0  #MC weight is 1/N => weight is one
        self.inputInfo['SamplerType'] = 'MonteCarlo'
Esempio n. 21
0
    def localGenerateInput(self, model, oldInput):
        """
      Function to select the next most informative point for refining the limit
      surface search.
      After this method is called, the self.inputInfo should be ready to be sent
      to the model
      @ In, model, model instance, an instance of a model
      @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
      @ Out, None
    """
        #note: pointsNeeded is the collection of points needed by sampler,
        #      while neededPoints is just the reference point that needs running
        #if there's a point that THIS sampler needs, prioritize it
        if len(self.neededPoints) > 0:
            pt = self.neededPoints.pop()
        #otherwise, take from the highest-impact sampler's needed points
        else:
            #pointsNeeded is in order from least to most impactful, so list reverse of keys.
            subsets = self.pointsNeeded.keys()
            subsets.reverse()
            #now they're in order of impact.  Look for the next point to run.
            found = False
            for sub in subsets:
                for p in self.pointsNeeded[sub]:
                    pt = self._expandCutPoint(sub, p)
                    if pt not in self.submittedNotCollected:
                        self.submittedNotCollected.append(pt)
                        found = True
                        break
                if found:
                    break
            if not found:
                #this should not occur, but is a good sign something went wrong in developing.
                self.raiseAnError(
                    RuntimeError,
                    'No point was found to generate!  This should not be possible...'
                )
        #add the number of necessary distinct points to a set (so no duplicates).
        self.distinctPoints.add(pt)
        for v, varName in enumerate(self.features):
            # compute the SampledVarsPb for 1-D distribution
            if self.variables2distributionsMapping[varName]['totDim'] == 1:
                for key in varName.strip().split(','):
                    self.values[key] = pt[v]
                self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                    varName].pdf(pt[v])
                self.inputInfo['ProbabilityWeight-' + varName.replace(
                    ",", "-")] = self.inputInfo['SampledVarsPb'][varName]
            # compute the SampledVarsPb for N-D distribution
            elif self.variables2distributionsMapping[varName][
                    'totDim'] > 1 and self.variables2distributionsMapping[
                        varName]['reducedDim'] == 1:
                dist = self.variables2distributionsMapping[varName]['name']
                ndCoordinates = np.zeros(
                    len(self.distributions2variablesMapping[dist]))
                positionList = self.distributions2variablesIndexList[dist]
                for varDict in self.distributions2variablesMapping[dist]:
                    var = utils.first(varDict.keys())
                    position = utils.first(varDict.values())
                    location = -1
                    for key in var.strip().split(','):
                        if key in self.features:
                            location = self.features.index(key)
                            break
                    if location > -1:
                        ndCoordinates[positionList.index(
                            position)] = pt[location]
                    else:
                        self.raiseAnError(
                            IOError, 'The variables ' + var +
                            ' listed in adaptive sobol sampler, but not used in the ROM!'
                        )
                    for key in var.strip().split(','):
                        self.values[key] = pt[location]
                self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                    varName].pdf(ndCoordinates)
                self.inputInfo['ProbabilityWeight-' + varName.replace(
                    ",", "!")] = self.inputInfo['SampledVarsPb'][varName]

        self.inputInfo['PointProbability'] = reduce(
            mul, self.inputInfo['SampledVarsPb'].values())
        self.inputInfo['SamplerType'] = 'Adaptive Sparse Grids for Sobol'
Esempio n. 22
0
  def run(self, inputIn):
    """
      This method executes the postprocessor action. In this case it performs
      the action defined in the external pp
      @ In, inputIn, dict, dictionary of data to process
      @ Out, outputDict, dict, Dictionary containing the post-processed results
    """
    inputDict = self.inputToInternal(inputIn)
    outputDict = {}
    ## This will map the name to its appropriate interface and method
    ## in the case of a function being defined in two separate files, we
    ## qualify the output by appending the name of the interface from which it
    ## originates
    methodMap = {}

    ## First check all the requested methods are available and if there are
    ## duplicates then qualify their names for the user
    for method in self.methodsToRun:
      matchingInterfaces = []
      for interface in self.externalInterfaces:
        if method in interface.availableMethods():
          matchingInterfaces.append(interface)
      if len(matchingInterfaces) == 0:
        self.raiseAWarning(method + ' not found. I will skip it.')
      #elif len(matchingInterfaces) == 1:
      #  methodMap[method] = (matchingInterfaces[0], method)
      else:
        for interface in matchingInterfaces:
          methodName = interface.name + '_' + method
          methodMap[methodName] = (interface, method)

    ## Evaluate the method and add it to the outputDict, also if the method
    ## adjusts the input data, then you should update it as well.
    warningMessages = []
    for methodName, (interface, method) in methodMap.items():
      # The deep copy is needed since the interface postprocesor will change the values of inputDict
      tempInputDict = copy.deepcopy(inputDict)
      outputDict[methodName] = np.atleast_1d(copy.copy(interface.evaluate(method, tempInputDict)))
      if outputDict[methodName] is None:
        self.raiseAnError(Exception,"the method "+methodName+" has not produced any result. It needs to return a result!")
      for target in tempInputDict.keys():
        if hasattr(interface, target):
          #if target not in outputDict.keys():
          if target not in methodMap.keys():
            attributeInSelf = getattr(interface, target)
            if (np.atleast_1d(attributeInSelf)).shape != (np.atleast_1d(inputDict[target])).shape or (np.atleast_1d(attributeInSelf) - np.atleast_1d(inputDict[target])).all():
              if target in outputDict.keys():
                self.raiseAWarning("In Post-Processor "+ self.name +" the modified variable "+target+
                               " has the same name of a one already modified through another Function method." +
                               " This method overwrites the input DataObject variable value")
              outputDict[target] = np.atleast_1d(attributeInSelf)
          else:
            warningMessages.append("In Post-Processor "+ self.name +" the method "+method+
                               " has the same name of a variable contained in the input DataObject." +
                               " This method overwrites the input DataObject variable value")
    for msg in list(set(warningMessages)):
      self.raiseAWarning(msg)

    # TODO: We assume the structure of input to the external pp is the same as the struture of output to this external pp
    # An interface pp should be used if the user wants to merge two data objects, or change the structures of input data
    # objects.
    numRlz = len(utils.first(outputDict.values()))
    for val in outputDict.values():
      if len(val) != numRlz:
        self.raiseAnError(IOError, "The return results from the external functions have different number of realizations!"
                + " This postpocessor ", self.name, " requests all the returned values should have the same number of realizations.")
    for target in inputDict.keys():
      if target not in outputDict.keys():
        if len(inputDict[target]) != numRlz:
          self.raiseAWarning("Parameter ", target, " is available in the provided input DataObjects,"
                  + " but it has different length from the returned values from the external functions."
                  + " Thus this parameter will not be accessible by the output DataObjects!")
        else:
          outputDict[target] = np.atleast_1d(inputDict[target])

    return outputDict
Esempio n. 23
0
    def _checkClosestBranch(self):
        """
      Function that checks the closest branch already evaluated
      @ In, None
      @ Out, returnTuple, tuple, closest branch info:
        - if self.hybridDETstrategy and branch found         -> returnTuple = (valBranch,cdfValues,treer)
        - if self.hybridDETstrategy and branch not found     -> returnTuple = (None,cdfValues,treer)
        - if not self.hybridDETstrategy and branch found     -> returnTuple = (valBranch,cdfValues)
        - if not self.hybridDETstrategy and branch not found -> returnTuple = (None,cdfValues)
    """
        from sklearn import neighbors

        # compute cdf of sampled vars
        lowerCdfValues = {}
        cdfValues = {}
        self.raiseADebug("Check for closest branch:")
        self.raiseADebug("_" * 50)
        for key, value in self.values.items():
            self.raiseADebug("Variable name   : " + str(key))
            self.raiseADebug("Distrbution name: " + str(self.toBeSampled[key]))
            if key not in self.epistemicVariables.keys():
                cdfValues[key] = self.distDict[key].cdf(value)
                try:
                    index = utils.first(
                        np.atleast_1d(
                            np.asarray(self.branchProbabilities[key]) <=
                            cdfValues[key]).nonzero())[-1]
                    val = self.branchProbabilities[key][index]
                except (ValueError, IndexError):
                    val = None
                lowerCdfValues[key] = val
                self.raiseADebug("CDF value       : " + str(cdfValues[key]))
                self.raiseADebug("Lower CDF found : " +
                                 str(lowerCdfValues[key]))
            self.raiseADebug("_" * 50)
        #if hybrid DET, we need to find the correct tree that matches the values of the epistemic
        if self.hybridDETstrategy is not None:
            self.foundEpistemicTree, treer, compareDict = False, None, dict.fromkeys(
                self.epistemicVariables.keys(), False)
            for tree in self.TreeInfo.values():
                epistemicVars = tree.getrootnode().get(
                    "hybridsamplerCoordinate")[0]['SampledVars']
                for key in self.epistemicVariables.keys():
                    compareDict[key] = utils.compare(epistemicVars[key],
                                                     self.values[key])
                if all(compareDict.values()):
                    # we found the right epistemic tree
                    self.foundEpistemicTree, treer = True, tree
                    break
        else:
            treer = utils.first(self.TreeInfo.values())

        # check if in the adaptive points already explored (if not push into the grid)
        if not self.insertAdaptBPb:
            candidatesBranch = []
            # check if adaptive point is better choice -> TODO: improve efficiency
            for invPoint in self.investigatedPoints:
                pbth = [
                    invPoint[self.toBeSampled[key]]
                    for key in cdfValues.keys()
                ]
                if all(i <= pbth[cnt]
                       for cnt, i in enumerate(cdfValues.values())):
                    candidatesBranch.append(invPoint)
            if len(candidatesBranch) > 0:
                if None in lowerCdfValues.values():
                    lowerCdfValues = candidatesBranch[0]
                for invPoint in candidatesBranch:
                    pbth = [
                        invPoint[self.toBeSampled[key]]
                        for key in cdfValues.keys()
                    ]
                    if all(i >= pbth[cnt]
                           for cnt, i in enumerate(lowerCdfValues.values())):
                        lowerCdfValues = invPoint
        # Check if The adaptive point requested is outside the so far run grid; in case return None
        # In addition, if Adaptive Hybrid DET, if treer is None, we did not find any tree
        #              in the epistemic space => we need to create another one
        if None in lowerCdfValues.values() or treer is None:
            if self.hybridDETstrategy is not None:
                returnTuple = None, cdfValues, treer
            else:
                returnTuple = None, cdfValues
            return returnTuple

        nntrain, mapping = None, {}
        for ending in treer.iterProvidedFunction(self._checkEnded):
            #already ended branches, create training set for nearest algorithm (take coordinates <= of cdfValues) -> TODO: improve efficiency
            pbth = [
                ending.get('SampledVarsPb')[key]
                for key in lowerCdfValues.keys()
            ]
            if all(pbth[cnt] <= i
                   for cnt, i in enumerate(lowerCdfValues.values())):
                if nntrain is None:
                    nntrain = np.zeros((1, len(cdfValues.keys())))
                    nntrain[0, :] = np.array(copy.copy(pbth))
                else:
                    nntrain = np.concatenate(
                        (nntrain, np.atleast_2d(np.array(copy.copy(pbth)))),
                        axis=0)
                mapping[nntrain.shape[0]] = ending
        if nntrain is not None:
            neigh = neighbors.NearestNeighbors(n_neighbors=len(mapping.keys()))
            neigh.fit(nntrain)
            valBranch = self._checkValidityOfBranch(
                neigh.kneighbors([list(lowerCdfValues.values())]), mapping)
            if self.hybridDETstrategy is not None:
                returnTuple = valBranch, cdfValues, treer
            else:
                returnTuple = valBranch, cdfValues
            return returnTuple
        else:
            returnTuple = (None, cdfValues,
                           treer) if self.hybridDETstrategy is not None else (
                               None, cdfValues)
            return returnTuple
Esempio n. 24
0
    def initialize(self, externalSeeding=None, solutionExport=None):
        """
      This function should be called every time a clean sampler is needed. Called before takeAstep in <Step>
      @ In, externalSeeding, int, optional, external seed
      @ In, solutionExport, DataObject, optional, in goal oriented sampling (a.k.a. adaptive sampling this is where the space/point satisfying the constrains)
      @ Out, None
    """
        if self.initSeed == None:
            self.initSeed = Distributions.randomIntegers(0, 2**31, self)
        self.counter = 0
        if not externalSeeding:
            Distributions.randomSeed(
                self.initSeed)  #use the sampler initialization seed
            self.auxcnt = self.initSeed
        elif externalSeeding == 'continue':
            pass  #in this case the random sequence needs to be preserved
        else:
            Distributions.randomSeed(
                externalSeeding)  #the external seeding is used
            self.auxcnt = externalSeeding
        #grab restart dataobject if it's available, then in localInitialize the sampler can deal with it.
        if 'Restart' in self.assemblerDict.keys():
            self.raiseADebug('Restart object: ' +
                             str(self.assemblerDict['Restart']))
            self.restartData = self.assemblerDict['Restart'][0][3]
            self.raiseAMessage('Restarting from ' + self.restartData.name)
            #check consistency of data
            try:
                rdata = self.restartData.getAllMetadata()['crowDist']
                sdata = self.inputInfo['crowDist']
                self.raiseAMessage('sampler inputs:')
                for sk, sv in sdata.items():
                    self.raiseAMessage('|   ' + str(sk) + ': ' + str(sv))
                for i, r in enumerate(rdata):
                    if type(r) != dict: continue
                    if not r == sdata:
                        self.raiseAMessage('restart inputs %i:' % i)
                        for rk, rv in r.items():
                            self.raiseAMessage('|   ' + str(rk) + ': ' +
                                               str(rv))
                        self.raiseAnError(
                            IOError,
                            'Restart "%s" data[%i] does not have same inputs as sampler!'
                            % (self.restartData.name, i))
            except KeyError as e:
                self.raiseAWarning(
                    "No CROW distribution available in restart -", e)
        else:
            self.raiseAMessage('No restart for ' + self.printTag)

        #load restart data into existing points
        if self.restartData is not None:
            if not self.restartData.isItEmpty():
                inps = self.restartData.getInpParametersValues()
                outs = self.restartData.getOutParametersValues()
                #FIXME there is no guarantee ordering is accurate between restart data and sampler
                inputs = list(v for v in inps.values())
                existingInps = zip(*inputs)
                outVals = zip(*list(v for v in outs.values()))
                self.existing = dict(zip(existingInps, outVals))

        #specializing the self.localInitialize() to account for adaptive sampling
        if solutionExport != None:
            self.localInitialize(solutionExport=solutionExport)
        else:
            self.localInitialize()

        for distrib in self.NDSamplingParams:
            if distrib in self.distributions2variablesMapping:
                params = self.NDSamplingParams[distrib]
                temp = utils.first(
                    self.distributions2variablesMapping[distrib][0].keys())
                self.distDict[temp].updateRNGParam(params)
            else:
                self.raiseAnError(
                    IOError,
                    'Distribution "%s" specified in distInit block of sampler "%s" does not exist!'
                    % (distrib, self.name))

        # Store the transformation matrix in the metadata
        if self.variablesTransformationDict:
            self.entitiesToRemove = []
            for variable in self.variables2distributionsMapping.keys():
                distName = self.variables2distributionsMapping[variable][
                    'name']
                dim = self.variables2distributionsMapping[variable]['dim']
                totDim = self.variables2distributionsMapping[variable][
                    'totDim']
                if totDim > 1 and dim == 1:
                    transformDict = {}
                    transformDict['type'] = self.distDict[
                        variable.strip()].type
                    transformDict['transformationMatrix'] = self.distDict[
                        variable.strip()].transformationMatrix()
                    self.inputInfo['transformation-' +
                                   distName] = transformDict
                    self.entitiesToRemove.append('transformation-' + distName)
Esempio n. 25
0
    def _readMoreXMLbase(self, xmlNode):
        """
      Function to read the portion of the xml input that belongs to the base sampler only
      and initialize some stuff based on the inputs got
      The text is supposed to contain the info where and which variable to change.
      In case of a code the syntax is specified by the code interface itself
      @ In, xmlNode, xml.etree.ElementTree.Element, Xml element node1
      @ Out, None
    """
        for child in xmlNode:
            prefix = ""
            if child.tag == 'Distribution':
                for childChild in child:
                    if childChild.tag == 'distribution':
                        prefix = "<distribution>"
                        tobesampled = childChild.text
                self.toBeSampled[prefix + child.attrib['name']] = tobesampled
                #if child.attrib['name'] != tobesampled:self.raiseAnError(IOError,"name of the <Distribution> node and <distribution> mismatches for node named "+ child.attrib['name'])
            elif child.tag == 'variable':
                foundDistOrFunc = False
                for childChild in child:
                    if childChild.tag == 'distribution':
                        if not foundDistOrFunc: foundDistOrFunc = True
                        else:
                            self.raiseAnError(
                                IOError,
                                'A sampled variable cannot have both a distribution and a function!'
                            )
                        tobesampled = childChild.text
                        varData = {}
                        varData['name'] = childChild.text
                        if childChild.get('dim') == None:
                            dim = 1
                        else:
                            dim = childChild.attrib['dim']
                        varData['dim'] = int(dim)
                        self.variables2distributionsMapping[
                            child.attrib['name']] = varData
                        self.toBeSampled[prefix +
                                         child.attrib['name']] = tobesampled
                    elif childChild.tag == 'function':
                        if not foundDistOrFunc: foundDistOrFunc = True
                        else:
                            self.raiseAnError(
                                IOError,
                                'A sampled variable cannot have both a distribution and a function!'
                            )
                        tobesampled = childChild.text
                        self.dependentSample[
                            prefix + child.attrib['name']] = tobesampled
                if not foundDistOrFunc:
                    self.raiseAnError(
                        IOError, 'Sampled variable', child.attrib['name'],
                        'has neither a <distribution> nor <function> node specified!'
                    )
            elif child.tag == "variablesTransformation":
                transformationDict = {}
                listIndex = None
                for childChild in child:
                    if childChild.tag == "latentVariables":
                        transformationDict[childChild.tag] = list(
                            inp.strip()
                            for inp in childChild.text.strip().split(','))
                    elif childChild.tag == "manifestVariables":
                        transformationDict[childChild.tag] = list(
                            inp.strip()
                            for inp in childChild.text.strip().split(','))
                    elif childChild.tag == "manifestVariablesIndex":
                        # the index provided by the input file starts from 1, but the index used by the code starts from 0.
                        listIndex = list(
                            int(inp.strip()) - 1
                            for inp in childChild.text.strip().split(','))
                    elif childChild.tag == "method":
                        self.transformationMethod[
                            child.attrib['distribution']] = childChild.text
                if listIndex == None:
                    self.raiseAWarning(
                        'Index is not provided for manifestVariables, default index will be used instead!'
                    )
                    listIndex = range(
                        len(transformationDict["manifestVariables"]))
                transformationDict["manifestVariablesIndex"] = listIndex
                self.variablesTransformationDict[
                    child.attrib['distribution']] = transformationDict
            elif child.tag == "constant":
                value = utils.partialEval(child.text)
                if value is None:
                    self.raiseAnError(
                        IOError,
                        'The body of "constant" XML block should be a number. Got: '
                        + child.text)
                try:
                    self.constants[child.attrib['name']] = value
                except KeyError:
                    self.raiseAnError(
                        KeyError,
                        child.tag + ' must have the attribute "name"!!!')
            elif child.tag == "restartTolerance":
                self.restartTolerance = float(child.text)

        if len(self.constants) > 0:
            # check if constant variables are also part of the sampled space. In case, error out
            if not set(self.toBeSampled.keys()).isdisjoint(
                    self.constants.keys()):
                self.raiseAnError(
                    IOError,
                    "Some constant variables are also in the sampling space:" +
                    ' '.join([
                        i if i in self.toBeSampled.keys() else ""
                        for i in self.constants.keys()
                    ]))

        if self.initSeed == None:
            self.initSeed = Distributions.randomIntegers(0, 2**31, self)
        # Creation of the self.distributions2variablesMapping dictionary: {'distName': ({'variable_name1': dim1}, {'variable_name2': dim2})}
        for variable in self.variables2distributionsMapping.keys():
            distName = self.variables2distributionsMapping[variable]['name']
            dim = self.variables2distributionsMapping[variable]['dim']
            listElement = {}
            listElement[variable] = dim
            if (distName in self.distributions2variablesMapping.keys()):
                self.distributions2variablesMapping[distName].append(
                    listElement)
            else:
                self.distributions2variablesMapping[distName] = [listElement]

        # creation of the self.distributions2variablesIndexList dictionary:{'distName':[dim1,dim2,...,dimN]}
        self.distributions2variablesIndexList = {}
        for distName in self.distributions2variablesMapping.keys():
            positionList = []
            for var in self.distributions2variablesMapping[distName]:
                position = utils.first(var.values())
                positionList.append(position)
            positionList = list(set(positionList))
            positionList.sort()
            self.distributions2variablesIndexList[distName] = positionList

        for key in self.variables2distributionsMapping.keys():
            distName = self.variables2distributionsMapping[key]['name']
            dim = self.variables2distributionsMapping[key]['dim']
            reducedDim = self.distributions2variablesIndexList[distName].index(
                dim) + 1
            self.variables2distributionsMapping[key][
                'reducedDim'] = reducedDim  # the dimension of variable in the transformed space
            self.variables2distributionsMapping[key]['totDim'] = max(
                self.distributions2variablesIndexList[distName]
            )  # We will reset the value if the node <variablesTransformation> exist in the raven input file
            if not self.variablesTransformationDict and self.variables2distributionsMapping[
                    key]['totDim'] > 1:
                if self.variables2distributionsMapping[key]['totDim'] != len(
                        self.distributions2variablesIndexList[distName]):
                    self.raiseAnError(
                        IOError,
                        'The "dim" assigned to the variables insider Sampler are not correct! the "dim" should start from 1, and end with the full dimension of given distribution'
                    )

        #Checking the variables transformation
        if self.variablesTransformationDict:
            for dist, varsDict in self.variablesTransformationDict.items():
                maxDim = len(varsDict['manifestVariables'])
                listLatentElement = varsDict['latentVariables']
                if len(set(listLatentElement)) != len(listLatentElement):
                    dups = set(var for var in listLatentElement
                               if listLatentElement.count(var) > 1)
                    self.raiseAnError(
                        IOError,
                        'The following are duplicated variables listed in the latentVariables: '
                        + str(dups))
                if len(set(varsDict['manifestVariables'])) != len(
                        varsDict['manifestVariables']):
                    dups = set(var for var in varsDict['manifestVariables']
                               if varsDict['manifestVariables'].count(var) > 1)
                    self.raiseAnError(
                        IOError,
                        'The following are duplicated variables listed in the manifestVariables: '
                        + str(dups))
                if len(set(varsDict['manifestVariablesIndex'])) != len(
                        varsDict['manifestVariablesIndex']):
                    dups = set(
                        var + 1 for var in varsDict['manifestVariablesIndex']
                        if varsDict['manifestVariablesIndex'].count(var) > 1)
                    self.raiseAnError(
                        IOError,
                        'The following are duplicated variables indices listed in the manifestVariablesIndex: '
                        + str(dups))
                listElement = self.distributions2variablesMapping[dist]
                for var in listElement:
                    self.variables2distributionsMapping[var.keys()[0]][
                        'totDim'] = maxDim  #reset the totDim to reflect the totDim of original input space
                tempListElement = {
                    k.strip(): v
                    for x in listElement for ks, v in x.items()
                    for k in list(ks.strip().split(','))
                }
                listIndex = []
                for var in listLatentElement:
                    if var not in set(tempListElement.keys()):
                        self.raiseAnError(
                            IOError,
                            'The variable listed in latentVariables ' + var +
                            ' is not listed in the given distribution: ' +
                            dist)
                    listIndex.append(tempListElement[var] - 1)
                if max(listIndex) > maxDim:
                    self.raiseAnError(
                        IOError, 'The maximum dim = ' + str(max(listIndex)) +
                        ' defined for latent variables is exceeded the dimension of the problem '
                        + str(maxDim))
                if len(set(listIndex)) != len(listIndex):
                    dups = set(var + 1 for var in listIndex
                               if listIndex.count(var) > 1)
                    self.raiseAnError(
                        IOError,
                        'Each of the following dimensions  are assigned to multiple latent variables in Samplers: '
                        + str(dups))
                # update the index for latentVariables according to the 'dim' assigned for given var defined in Sampler
                self.variablesTransformationDict[dist][
                    'latentVariablesIndex'] = listIndex
Esempio n. 26
0
    def localGenerateInput(self, model, myInput):
        """
      Function to select the next most informative point for refining the limit
      surface search.
      After this method is called, the self.inputInfo should be ready to be sent
      to the model
      @ In, model, model instance, an instance of a model
      @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
      @ Out, None
    """
        varCount = 0
        self.inputInfo['distributionName'] = {
        }  #Used to determine which distribution to change if needed.
        self.inputInfo['distributionType'] = {
        }  #Used to determine which distribution type is used
        weight = 1.0
        for varName in self.axisName:
            # new implementation for ND LHS
            if not "<distribution>" in varName:
                if self.variables2distributionsMapping[varName][
                        'totDim'] > 1 and self.variables2distributionsMapping[
                            varName]['reducedDim'] == 1:
                    # to avoid double count of weight for ND distribution; I need to count only one variable instaed of N
                    if self.variablesTransformationDict:
                        distName = self.variables2distributionsMapping[
                            varName]['name']
                        for distVarName in self.distributions2variablesMapping[
                                distName]:
                            for kkey in utils.first(
                                    distVarName.keys()).strip().split(','):
                                self.inputInfo['distributionName'][
                                    kkey] = self.toBeSampled[varName]
                                self.inputInfo['distributionType'][
                                    kkey] = self.distDict[varName].type
                        ndCoordinate = np.zeros(
                            len(self.distributions2variablesMapping[distName]))
                        dxs = np.zeros(
                            len(self.distributions2variablesMapping[distName]))
                        centerCoordinate = np.zeros(
                            len(self.distributions2variablesMapping[distName]))
                        positionList = self.distributions2variablesIndexList[
                            distName]
                        for var in self.distributions2variablesMapping[
                                distName]:
                            # if the varName is a comma separated list of strings the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables
                            variable = utils.first(var.keys()).strip()
                            position = utils.first(var.values())
                            upper = self.gridEntity.returnShiftedCoordinate(
                                self.gridEntity.returnIteratorIndexes(), {
                                    variable:
                                    self.sampledCoordinate[self.counter -
                                                           1][varCount] + 1
                                })[variable]
                            lower = self.gridEntity.returnShiftedCoordinate(
                                self.gridEntity.returnIteratorIndexes(), {
                                    variable:
                                    self.sampledCoordinate[self.counter -
                                                           1][varCount]
                                })[variable]
                            varCount += 1
                            if self.gridInfo[variable] == 'CDF':
                                coordinate = lower + (
                                    upper - lower) * Distributions.random()
                                ndCoordinate[positionList.index(
                                    position)] = self.distDict[
                                        variable].inverseMarginalDistribution(
                                            coordinate, variable)
                                dxs[positionList.index(
                                    position
                                )] = self.distDict[
                                    variable].inverseMarginalDistribution(
                                        max(upper, lower), variable
                                    ) - self.distDict[
                                        variable].inverseMarginalDistribution(
                                            min(upper, lower), variable)
                                centerCoordinate[positionList.index(
                                    position)] = (self.distDict[variable].
                                                  inverseMarginalDistribution(
                                                      upper, variable) +
                                                  self.distDict[variable].
                                                  inverseMarginalDistribution(
                                                      lower, variable)) / 2.0
                                for kkey in variable.strip().split(','):
                                    self.values[kkey] = ndCoordinate[
                                        positionList.index(position)]
                                    self.inputInfo['upper'][kkey] = self.distDict[
                                        variable].inverseMarginalDistribution(
                                            max(upper, lower), variable)
                                    self.inputInfo['lower'][kkey] = self.distDict[
                                        variable].inverseMarginalDistribution(
                                            min(upper, lower), variable)
                            elif self.gridInfo[variable] == 'value':
                                dxs[positionList.index(position)] = max(
                                    upper, lower) - min(upper, lower)
                                centerCoordinate[positionList.index(
                                    position)] = (upper + lower) / 2.0
                                coordinateCdf = self.distDict[
                                    variable].marginalCdf(lower) + (
                                        self.distDict[variable].marginalCdf(
                                            upper) -
                                        self.distDict[variable].marginalCdf(
                                            lower)) * Distributions.random()
                                coordinate = self.distDict[
                                    variable].inverseMarginalDistribution(
                                        coordinateCdf, variable)
                                ndCoordinate[positionList.index(
                                    position)] = coordinate
                                for kkey in variable.strip().split(','):
                                    self.values[kkey] = coordinate
                                    self.inputInfo['upper'][kkey] = max(
                                        upper, lower)
                                    self.inputInfo['lower'][kkey] = min(
                                        upper, lower)
                        self.inputInfo['ProbabilityWeight-' + varName.replace(
                            ",", "!")] = self.distDict[varName].cellIntegral(
                                centerCoordinate, dxs)
                        weight *= self.inputInfo['ProbabilityWeight-' +
                                                 varName.replace(",", "!")]
                        self.inputInfo['SampledVarsPb'][
                            varName] = self.distDict[varName].pdf(ndCoordinate)
                    else:
                        if self.gridInfo[varName] == 'CDF':
                            upper = self.gridEntity.returnShiftedCoordinate(
                                self.gridEntity.returnIteratorIndexes(), {
                                    varName:
                                    self.sampledCoordinate[self.counter -
                                                           1][varCount] + 1
                                })[varName]
                            lower = self.gridEntity.returnShiftedCoordinate(
                                self.gridEntity.returnIteratorIndexes(), {
                                    varName:
                                    self.sampledCoordinate[self.counter -
                                                           1][varCount]
                                })[varName]
                            varCount += 1
                            coordinate = lower + (
                                upper - lower) * Distributions.random()
                            gridCoordinate, distName = self.distDict[
                                varName].ppf(
                                    coordinate
                                ), self.variables2distributionsMapping[
                                    varName]['name']
                            for distVarName in self.distributions2variablesMapping[
                                    distName]:
                                for kkey in utils.first(
                                        distVarName.keys()).strip().split(','):
                                    self.inputInfo['distributionName'][
                                        kkey], self.inputInfo['distributionType'][
                                            kkey], self.values[
                                                kkey] = self.toBeSampled[
                                                    varName], self.distDict[
                                                        varName].type, np.atleast_1d(
                                                            gridCoordinate
                                                        )[distVarName.values()
                                                          [0] - 1]
                            # coordinate stores the cdf values, we need to compute the pdf for SampledVarsPb
                            self.inputInfo['SampledVarsPb'][
                                varName] = self.distDict[varName].pdf(
                                    np.atleast_1d(gridCoordinate).tolist())
                            weight *= max(upper, lower) - min(upper, lower)
                            self.inputInfo['ProbabilityWeight-' +
                                           varName.replace(",", "!")] = max(
                                               upper, lower) - min(
                                                   upper, lower)
                        else:
                            self.raiseAnError(
                                IOError,
                                "Since the globalGrid is defined, the Stratified Sampler is only working when the sampling is performed on a grid on a CDF. However, the user specifies the grid on "
                                + self.gridInfo[varName])
            if (
                    "<distribution>" in varName
            ) or self.variables2distributionsMapping[varName]['totDim'] == 1:
                # 1D variable
                # if the varName is a comma separated list of strings the user wants to sample the comma separated variables with the same sampled value => link the value to all comma separated variables
                upper = self.gridEntity.returnShiftedCoordinate(
                    self.gridEntity.returnIteratorIndexes(), {
                        varName:
                        self.sampledCoordinate[self.counter - 1][varCount] + 1
                    })[varName]
                lower = self.gridEntity.returnShiftedCoordinate(
                    self.gridEntity.returnIteratorIndexes(), {
                        varName:
                        self.sampledCoordinate[self.counter - 1][varCount]
                    })[varName]
                varCount += 1
                if self.gridInfo[varName] == 'CDF':
                    coordinate = lower + (upper -
                                          lower) * Distributions.random()
                    ppfValue = self.distDict[varName].ppf(coordinate)
                    ppfLower = self.distDict[varName].ppf(min(upper, lower))
                    ppfUpper = self.distDict[varName].ppf(max(upper, lower))
                    weight *= self.distDict[varName].cdf(
                        ppfUpper) - self.distDict[varName].cdf(ppfLower)
                    self.inputInfo['ProbabilityWeight-' + varName.replace(
                        ",", "-")] = self.distDict[varName].cdf(
                            ppfUpper) - self.distDict[varName].cdf(ppfLower)
                    self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                        varName].pdf(ppfValue)
                elif self.gridInfo[varName] == 'value':
                    coordinateCdf = self.distDict[varName].cdf(
                        min(upper, lower)) + (self.distDict[varName].cdf(
                            max(upper, lower)) - self.distDict[varName].cdf(
                                min(upper, lower))) * Distributions.random()
                    if coordinateCdf == 0.0:
                        self.raiseAWarning(
                            IOError,
                            "The grid lower bound and upper bound in value will generate ZERO cdf value!!!"
                        )
                    coordinate = self.distDict[varName].ppf(coordinateCdf)
                    weight *= self.distDict[varName].cdf(max(
                        upper, lower)) - self.distDict[varName].cdf(
                            min(upper, lower))
                    self.inputInfo['ProbabilityWeight-' + varName.replace(
                        ",", "-")] = self.distDict[varName].cdf(
                            max(upper, lower)) - self.distDict[varName].cdf(
                                min(upper, lower))
                    self.inputInfo['SampledVarsPb'][varName] = self.distDict[
                        varName].pdf(coordinate)
                for kkey in varName.strip().split(','):
                    self.inputInfo['distributionName'][
                        kkey] = self.toBeSampled[varName]
                    self.inputInfo['distributionType'][kkey] = self.distDict[
                        varName].type
                    if self.gridInfo[varName] == 'CDF':
                        self.values[kkey] = ppfValue
                        self.inputInfo['upper'][kkey] = ppfUpper
                        self.inputInfo['lower'][kkey] = ppfLower
                    elif self.gridInfo[varName] == 'value':
                        self.values[kkey] = coordinate
                        self.inputInfo['upper'][kkey] = max(upper, lower)
                        self.inputInfo['lower'][kkey] = min(upper, lower)

        self.inputInfo['PointProbability'] = reduce(
            mul, self.inputInfo['SampledVarsPb'].values())
        self.inputInfo['ProbabilityWeight'] = weight
        self.inputInfo['SamplerType'] = 'Stratified'
Esempio n. 27
0
 def collectOutputFromDataObject(self,exportDict,output):
   """
     Method to collect the output from a DataObject (if it is not a dataObject, it just returns a list with one single exportDict)
     @ In, exportDict, dict, the export dictionary
                              ({'inputSpaceParams':{var1:value1,var2:value2},
                                'outputSpaceParams':{outstreamName1:DataObject1,outstreamName2:DataObject2},
                                'metadata':{'metadataName1':value1,'metadataName2':value2}})
     @ Out, returnList, list, list of export dictionaries
   """
   returnList = []
   if utils.first(exportDict['outputSpaceParams'].values()).__class__.__base__.__name__ != 'Data':
     returnList.append(exportDict)
   else:
     # get the DataObject that is compatible with this output
     compatibleDataObject = None
     for dataObj in exportDict['outputSpaceParams'].values():
       if output.type == dataObj.type:
         compatibleDataObject = dataObj
         break
       if output.type == 'HDF5' and dataObj.type == 'HistorySet':
         compatibleDataObject = dataObj
         break
     if compatibleDataObject is None:
       # if none found (e.g. => we are filling an HistorySet with a PointSet), we take the first one
       compatibleDataObject = utils.first(exportDict['outputSpaceParams'].values())
     # get the values
     inputs = compatibleDataObject.getParametersValues('inputs',nodeId = 'RecontructEnding')
     unstructuredInputs = compatibleDataObject.getParametersValues('unstructuredinputs',nodeId = 'RecontructEnding')
     outputs = compatibleDataObject.getParametersValues('outputs',nodeId = 'RecontructEnding')
     metadata = compatibleDataObject.getAllMetadata(nodeId = 'RecontructEnding')
     inputKeys = inputs.keys() if compatibleDataObject.type == 'PointSet' else utils.first(inputs.values()).keys()
     # expand inputspace of current RAVEN
     for i in range(len(compatibleDataObject)):
       appendDict = {'inputSpaceParams':{},'outputSpaceParams':{},'metadata':{}}
       appendDict['inputSpaceParams'].update(exportDict['inputSpaceParams'])
       appendDict['metadata'].update(exportDict['metadata'])
       if compatibleDataObject.type == 'PointSet':
         for inKey, value in inputs.items():
           appendDict['inputSpaceParams'][inKey] = value[i]
         for inKey, value in unstructuredInputs.items():
           appendDict['inputSpaceParams'][inKey] = value[i]
         for outKey, value in outputs.items():
           appendDict['outputSpaceParams'][outKey] = value[i]
       else:
         for inKey, value in inputs.values()[i].items():
           appendDict['inputSpaceParams'][inKey] = value
         if len(unstructuredInputs) > 0:
           for inKey, value in unstructuredInputs.values()[i].items():
             appendDict['inputSpaceParams'][inKey] = value
         for outKey, value in outputs.values()[i].items():
           appendDict['outputSpaceParams'][outKey] = value
       # add metadata for both dataobject types
       for metadataToExport in ['SampledVars','SampledVarsPb']:
         if metadataToExport in metadata:
           appendDict['metadata'][metadataToExport].update(metadata[metadataToExport][i])
       weightForVars = ['ProbabilityWeight-'+var.strip()  for var in inputKeys]
       for metadataToMerge in ['ProbabilityWeight', 'PointProbability']+weightForVars:
         if metadataToMerge in appendDict['metadata']:
           if metadataToMerge in metadata:
             appendDict['metadata'][metadataToMerge]*= metadata[metadataToMerge][i]
         else:
           if metadataToMerge in metadata:
             appendDict['metadata'][metadataToMerge] = metadata[metadataToMerge][i]
       returnList.append(appendDict)
   return returnList
Esempio n. 28
0
  def evaluateSample(self, myInput, samplerType, kwargs):
    """
        This will evaluate an individual sample on this model. Note, parameters
        are needed by createNewInput and thus descriptions are copied from there.
        @ In, myInput, list, the inputs (list) to start from to generate the new one
        @ In, samplerType, string, is the type of sampler that is calling to generate a new input
        @ In, kwargs, dict,  is a dictionary that contains the information coming from the sampler,
           a mandatory key is the sampledVars that contains a dictionary {'name variable':value}
        @ Out, returnValue, tuple, This will hold two pieces of information,
          the first item will be the input data used to generate this sample,
          the second item will be the output of this model given the specified
          inputs
    """
    inputFiles = self.createNewInput(myInput, samplerType, **kwargs)
    self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None)
    returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec)

    ## Given that createNewInput can only return a tuple, I don't think these
    ## checks are necessary (keeping commented out until someone else can verify):
    # if type(returnedCommand).__name__ != 'tuple':
    #   self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple")
    # if type(returnedCommand[0]).__name__ != 'list':
    #   self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!")
    executeCommand, self.outFileRoot = returnedCommand

    precommand = kwargs['precommand']
    postcommand = kwargs['postcommand']
    bufferSize = kwargs['bufferSize']
    fileExtensionsToDelete = kwargs['deleteOutExtension']
    deleteSuccessfulLogFiles = kwargs['delSucLogFiles']

    codeLogFile = self.outFileRoot
    if codeLogFile is None:
      codeLogFile = os.path.join(metaData['subDirectory'],'generalOut')

    ## Before we were temporarily changing directories in order to copy the
    ## correct directory to the subprocess. Instead, we can just set the
    ## directory after we copy it over. -- DPM 5/5/2017
    sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory'])
    localenv = dict(os.environ)
    localenv['PWD'] = str(sampleDirectory)
    outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize)

    found = False
    for index, inputFile in enumerate(self.currentInputFiles):
      if inputFile.getExt() in self.code.getInputExtension():
        found = True
        break
    if not found:
      self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code '
                                  + self.subType +': ' + ' '.join(self.getInputExtension()))
    commands=[]
    for runtype,cmd in executeCommand:
      newCommand=''
      if runtype.lower() == 'parallel':
        newCommand += precommand
        newCommand += cmd+' '
        newCommand += postcommand
        commands.append(newCommand)
      elif runtype.lower() == 'serial':
        commands.append(cmd)
      else:
        self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"!  Instead received: ',runtype,'\nPlease check the code interface.')

    command = ' && '.join(commands)+' '

    command = command.replace("%INDEX%",kwargs['INDEX'])
    command = command.replace("%INDEX1%",kwargs['INDEX1'])
    command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID'])
    command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1'])
    command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR'])
    command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR'])
    ## Note this is the working directory that the subprocess will use, it is
    ## not the directory I am currently working. This bit me as I moved the code
    ## from the old ExternalRunner because in that case this was filled in after
    ## the process was submitted by the process itself. -- DPM 5/4/17
    command = command.replace("%WORKING_DIR%",sampleDirectory)
    command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR'])
    command = command.replace("%METHOD%",kwargs['METHOD'])
    command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS'])

    self.raiseAMessage('Execution command submitted:',command)
    if platform.system() == 'Windows':
      command = self._expandForWindows(command)
      self.raiseAMessage("modified command to", repr(command))
      for key, value in localenv.items():
        localenv[key]=str(value)
    elif not self.code.getRunOnShell():
      command = self._expandCommand(command)
    print(f'DEBUGG command: |{command}|')
    ## reset python path
    localenv.pop('PYTHONPATH',None)
    ## This code should be evaluated by the job handler, so it is fine to wait
    ## until the execution of the external subprocess completes.
    process = utils.pickleSafeSubprocessPopen(command, shell=self.code.getRunOnShell(), stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv)
    if self.maxWallTime is not None:
      timeout = time.time() + self.maxWallTime
      while True:
        time.sleep(0.5)
        process.poll()
        if time.time() > timeout and process.returncode is None:
          self.raiseAWarning('walltime exeeded in run in working dir: '+str(metaData['subDirectory'])+'. Killing the run...')
          process.kill()
          process.returncode = -1
        if process.returncode is not None or time.time() > timeout:
          break
    else:
      process.wait()

    returnCode = process.returncode
    # procOutput = process.communicate()[0]

    ## If the returnCode is already non-zero, we should maintain our current
    ## value as it may have some meaning that can be parsed at some point, so
    ## only set the returnCode to -1 in here if we did not already catch the
    ## failure.
    if returnCode == 0 and 'checkForOutputFailure' in dir(self.code):
      codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory'])
      if codeFailed:
        returnCode = -1
    # close the log file
    outFileObject.close()
    ## We should try and use the output the code interface gives us first, but
    ## in lieu of that we should fall back on the standard output of the code
    ## (Which was deleted above in some cases, so I am not sure if this was
    ##  an intentional design by the original developer or accidental and should
    ##  be revised).
    ## My guess is that every code interface implements this given that the code
    ## below always adds .csv to the filename and the standard output file does
    ## not have an extension. - (DPM 4/6/2017)
    outputFile = codeLogFile
    if 'finalizeCodeOutput' in dir(self.code) and returnCode == 0:
      finalCodeOutputFile = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory'])
      ## Special case for RAVEN interface --ALFOA 09/17/17
      ravenCase = False
      if type(finalCodeOutputFile).__name__ == 'dict':
        ravenCase = True
      if ravenCase and self.code.__class__.__name__ != 'RAVEN':
        self.raiseAnError(RuntimeError, 'The return argument from "finalizeCodeOutput" must be a str containing the new output file root!')
      if finalCodeOutputFile and not ravenCase:
        outputFile = finalCodeOutputFile

    ## If the run was successful
    if returnCode == 0:
      ## This may be a tautology at this point --DPM 4/12/17
      ## Special case for RAVEN interface. Added ravenCase flag --ALFOA 09/17/17
      if outputFile is not None and not ravenCase:
        outFile = Files.CSV()
        ## Should we be adding the file extension here?
        outFile.initialize(outputFile+'.csv',self.messageHandler,path=metaData['subDirectory'])

        csvLoader = CsvLoader.CsvLoader(self.messageHandler)
        # does this CodeInterface have sufficiently intense (or limited) CSV files that
        #   it needs to assume floats and use numpy, or can we use pandas?
        loadUtility = self.code.getCsvLoadUtil()
        csvData = csvLoader.loadCsvFile(outFile.getAbsFile(), nullOK=False, utility=loadUtility)
        returnDict = csvLoader.toRealization(csvData)

      if not ravenCase:
        self._replaceVariablesNamesWithAliasSystem(returnDict, 'inout', True)
        returnDict.update(kwargs)
        returnValue = (kwargs['SampledVars'],returnDict)
        exportDict = self.createExportDictionary(returnValue)
      else:
        # we have the DataObjects -> raven-runs-raven case only so far
        # we have two tasks to do: collect the input/output/meta/indexes from the INNER raven run, and ALSO the input from the OUTER raven run.
        #  -> in addition, we have to fix the probability weights.
        ## get the number of realizations
        ### we already checked consistency in the CodeInterface, so just get the length of the first data object
        numRlz = len(utils.first(finalCodeOutputFile.values()))
        ## set up the return container
        exportDict = {'RAVEN_isBatch':True,'realizations':[]}
        ## set up each realization
        for n in range(numRlz):
          rlz = {}
          ## collect the results from INNER, both point set and history set
          for dataObj in finalCodeOutputFile.values():
            # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
            new = dict((var,np.atleast_1d(val)) for var,val in dataObj.realization(index=n,unpackXArray=True).items())
            rlz.update( new )
          ## add OUTER input space
          # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
          new = dict((var,np.atleast_1d(val)) for var,val in kwargs['SampledVars'].items())
          rlz.update( new )
          ## combine ProbabilityWeights # TODO FIXME these are a rough attempt at getting it right!
          rlz['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * kwargs.get('ProbabilityWeight',1.0))
          rlz['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * kwargs.get('PointProbability',1.0))
          # FIXME: adding "_n" to Optimizer samples scrambles its ability to find evaluations!
          ## temporary fix: only append if there's multiple realizations, and error out if sampler is an optimizer.
          if numRlz > 1:
            if '_' in kwargs['prefix']:
              self.raiseAnError(RuntimeError,'OUTER RAVEN is using an OPTIMIZER, but INNER RAVEN is returning multiple realizations!')
            addon = '_{}'.format(n)
          else:
            addon = ''
          rlz['prefix'] = np.atleast_1d(kwargs['prefix']+addon)
          ## add the rest of the metadata # TODO slow
          for var,val in kwargs.items():
            if var not in rlz.keys():
              rlz[var] = np.atleast_1d(val)
          self._replaceVariablesNamesWithAliasSystem(rlz,'inout',True)
          exportDict['realizations'].append(rlz)

      ## The last thing before returning should be to delete the temporary log
      ## file and any other file the user requests to be cleared
      if deleteSuccessfulLogFiles:
        self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!')
        codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile)
        if os.path.exists(codeLofFileFullPath):
          os.remove(codeLofFileFullPath)

      ## Check if the user specified any file extensions for clean up
      for fileExt in fileExtensionsToDelete:
        fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ]
        for f in fileList:
          os.remove(f)

      return exportDict

    else:
      self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode))
      absOutputFile = os.path.join(sampleDirectory,outputFile)
      if os.path.exists(absOutputFile):
        self.raiseAMessage(repr(open(absOutputFile,"r").read()).replace("\\n","\n"))
      else:
        self.raiseAMessage(" No output " + absOutputFile)

      ## If you made it here, then the run must have failed
      return None
Esempio n. 29
0
 def localGenerateInput(self, model, myInput):
     """
   Function to select the next most informative point for refining the limit
   surface search.
   After this method is called, the self.inputInfo should be ready to be sent
   to the model
   @ In, model, model instance, an instance of a model
   @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.)
   @ Out, None
 """
     if self.startAdaptive == True and self.adaptiveReady == True:
         LimitSurfaceSearch.localGenerateInput(self, model, myInput)
         #the adaptive sampler created the next point sampled vars
         #find the closest branch
         if self.hybridDETstrategy is not None:
             closestBranch, cdfValues, treer = self._checkClosestBranch()
         else:
             closestBranch, cdfValues = self._checkClosestBranch()
         if closestBranch is None:
             self.raiseADebug(
                 'An usable branch for next candidate has not been found => create a parallel branch!'
             )
         # add pbthresholds in the grid
         investigatedPoint = {}
         for key, value in cdfValues.items():
             try:
                 ind = utils.first(
                     np.atleast_1d(
                         np.asarray(self.branchProbabilities[key]) <= value
                     ).nonzero())[-1]
             except (IndexError, ValueError):
                 ind = 0
             if value not in self.branchProbabilities[key]:
                 self.branchProbabilities[key].insert(ind, value)
                 self.branchValues[key].insert(
                     ind, self.distDict[key].ppf(value))
             investigatedPoint[key] = value
         # collect investigated point
         self.investigatedPoints.append(investigatedPoint)
         if closestBranch:
             info = self._retrieveBranchInfo(closestBranch)
             self._constructEndInfoFromBranch(model, myInput, info,
                                              cdfValues)
         else:
             # create a new tree, since there are no branches that are close enough to the adaptive request
             elm = ETS.HierarchicalNode(
                 self.messageHandler,
                 self.name + '_' + str(len(self.TreeInfo.keys()) + 1))
             elm.add('name',
                     self.name + '_' + str(len(self.TreeInfo.keys()) + 1))
             elm.add('startTime', 0.0)
             # Initialize the endTime to be equal to the start one...
             # It will modified at the end of each branch
             elm.add('endTime', 0.0)
             elm.add('runEnded', False)
             elm.add('running', True)
             elm.add('queue', False)
             elm.add('completedHistory', False)
             branchedLevel = {}
             for key, value in cdfValues.items():
                 branchedLevel[key] = utils.first(
                     np.atleast_1d(
                         np.asarray(self.branchProbabilities[key]) ==
                         value).nonzero())[-1]
             # The dictionary branchedLevel is stored in the xml tree too. That's because
             # the advancement of the thresholds must follow the tree structure
             elm.add('branchedLevel', branchedLevel)
             if self.hybridDETstrategy is not None and not self.foundEpistemicTree:
                 # adaptive hybrid DET and not found a tree in the epistemic space
                 # take the first tree and modify the hybridsamplerCoordinate
                 hybridSampled = copy.deepcopy(
                     utils.first(self.TreeInfo.values()).getrootnode().get(
                         'hybridsamplerCoordinate'))
                 for hybridStrategy in hybridSampled:
                     for key in self.epistemicVariables.keys():
                         if key in hybridStrategy['SampledVars'].keys():
                             self.raiseADebug("epistemic var " + str(key) +
                                              " value = " +
                                              str(self.values[key]))
                             hybridStrategy['SampledVars'][key] = copy.copy(
                                 self.values[key])
                             hybridStrategy['SampledVarsPb'][
                                 key] = self.distDict[key].pdf(
                                     self.values[key])
                             hybridStrategy['prefix'] = len(
                                 self.TreeInfo.values()) + 1
                     # TODO: find a strategy to recompute the probability weight here (for now == PointProbability)
                     hybridStrategy['PointProbability'] = reduce(
                         mul, self.inputInfo['SampledVarsPb'].values())
                     hybridStrategy['ProbabilityWeight'] = reduce(
                         mul, self.inputInfo['SampledVarsPb'].values())
                 elm.add('hybridsamplerCoordinate', hybridSampled)
             self.inputInfo.update({
                 'ProbabilityWeight-' + key.strip(): value
                 for key, value in self.inputInfo['SampledVarsPb'].items()
             })
             # Here it is stored all the info regarding the DET => we create the info for all the branchings and we store them
             self.TreeInfo[self.name + '_' +
                           str(len(self.TreeInfo.keys()) +
                               1)] = ETS.HierarchicalTree(
                                   self.messageHandler, elm)
             self._createRunningQueueBeginOne(
                 self.TreeInfo[self.name + '_' +
                               str(len(self.TreeInfo.keys()))],
                 branchedLevel, model, myInput)
     return DynamicEventTree.localGenerateInput(self, model, myInput)
Esempio n. 30
0
  def __runTemporalSciKitLearn(self, Input):
    """
      This method executes the postprocessor action. In this case it loads the
      results to specified dataObject.  This is for temporalSciKitLearn
      @ In, Input, dict, dictionary of data to process
      @ Out, outputDict, dict, dictionary containing the post-processed results
    """
    self.unSupervisedEngine.features = Input['Features']
    self.unSupervisedEngine.pivotVariable = self.pivotVariable

    if not self.unSupervisedEngine.amITrained:
      self.unSupervisedEngine.train(Input['Features'])
    self.unSupervisedEngine.confidence()

    self.userInteraction()

    outputDict = self.unSupervisedEngine.outputDict

    numberOfHistoryStep = self.unSupervisedEngine.numberOfHistoryStep
    numberOfSample = self.unSupervisedEngine.numberOfSample

    if 'bicluster' == self.unSupervisedEngine.getDataMiningType():
      self.raiseAnError(RuntimeError, 'Bicluster has not yet been implemented.')

    ## Rename the algorithm output to point to the user-defined label feature

    if 'labels' in self.unSupervisedEngine.outputDict['outputs'].keys():
      labels = np.zeros(shape=(numberOfSample,numberOfHistoryStep))
      for t in range(numberOfHistoryStep):
        labels[:,t] = self.unSupervisedEngine.outputDict['outputs']['labels'][t]
      outputDict['outputs'][self.labelFeature] = labels
    elif 'embeddingVectors' in outputDict['outputs']:
      transformedData = outputDict['outputs'].pop('embeddingVectors')
      reducedDimensionality = utils.first(transformedData.values()).shape[1]

      for i in range(reducedDimensionality):
        dimensionI = np.zeros(shape=(numberOfSample,numberOfHistoryStep))
        newColumnName = self.labelFeature + str(i + 1)

        for t in range(numberOfHistoryStep):
          dimensionI[:, t] =  transformedData[t][:, i]
        outputDict['outputs'][newColumnName] = dimensionI

    if 'cluster' == self.unSupervisedEngine.getDataMiningType():
      ## SKL will always enumerate cluster centers starting from zero, if this
      ## is violated, then the indexing below will break.
      if 'clusterCentersIndices' in self.unSupervisedEngine.metaDict.keys():
        clusterCentersIndices = self.unSupervisedEngine.metaDict['clusterCentersIndices']

      if 'clusterCenters' in self.unSupervisedEngine.metaDict.keys():
        clusterCenters = self.unSupervisedEngine.metaDict['clusterCenters']
        # Output cluster centroid to solutionExport
        if self.solutionExport is not None:
          rlzDims = {}
          rlzs = {}
          clusterLabels = range(int(np.max(labels)) + 1)
          rlzs[self.labelFeature] = np.atleast_1d(clusterLabels)
          rlzs[self.pivotParameter] = self.pivotVariable
          ## We will process each cluster in turn
          for rlzIndex in clusterLabels:
            ## Now we will process each feature available
            ## TODO: Ensure user requests each of these
            for featureIdx, feat in enumerate(self.unSupervisedEngine.features):
              ## We will go through the time series and find every instance
              ## where this cluster exists, if it does not, then we put a NaN
              ## to signal that the information is missing for this timestep
              timeSeries = np.zeros(numberOfHistoryStep)
              for timeIdx in range(numberOfHistoryStep):
                ## Here we use the assumption that SKL provides clusters that
                ## are integer values beginning at zero, which make for nice
                ## indexes with no need to add another layer of obfuscation
                if rlzIndex in clusterCentersIndices[timeIdx]:
                  loc = clusterCentersIndices[timeIdx].index(rlzIndex)
                  timeSeries[timeIdx] = self.unSupervisedEngine.metaDict['clusterCenters'][timeIdx][loc,featureIdx]
                else:
                  timeSeries[timeIdx] = np.atleast_1d(np.nan)

              ## In summary, for each feature, we fill a temporary array and
              ## stuff it into the solutionExport, one question is how do we
              ## tell it which item we are exporting? I am assuming that if
              ## I add an input, then I need to do the corresponding
              ## updateOutputValue to associate everything with it? Once I
              ## call updateInputValue again, it will move the pointer? This
              ## needs verified
              if feat not in rlzs.keys():
                rlzs[feat] = np.zeros((len(clusterLabels), numberOfHistoryStep))
                rlzs[feat][rlzIndex] = copy.copy(timeSeries)
                rlzDims[feat] = [self.pivotParameter]
              else:
                rlzs[feat][rlzIndex] = copy.copy(timeSeries)
          self.solutionExport.load(rlzs, style='dict',dims=rlzDims)

      if 'inertia' in self.unSupervisedEngine.outputDict.keys():
        inertia = self.unSupervisedEngine.outputDict['inertia']

    elif 'mixture' == self.unSupervisedEngine.getDataMiningType():
      if 'covars' in self.unSupervisedEngine.metaDict.keys():
        mixtureCovars = self.unSupervisedEngine.metaDict['covars']
      else:
        mixtureCovars = None

      if 'precs' in self.unSupervisedEngine.metaDict.keys():
        mixturePrecs = self.unSupervisedEngine.metaDict['precs']
      else:
        mixturePrecs = None

      if 'componentMeanIndices' in self.unSupervisedEngine.metaDict.keys():
        componentMeanIndices = self.unSupervisedEngine.metaDict['componentMeanIndices']
      else:
        componentMeanIndices = None

      if 'means' in self.unSupervisedEngine.metaDict.keys():
        mixtureMeans = self.unSupervisedEngine.metaDict['means']
      else:
        mixtureMeans = None

      # Output cluster centroid to solutionExport
      if self.solutionExport is not None:
        ## We will process each cluster in turn
        rlzDims = {}
        rlzs = {}
        ## First store the label as the input for this cluster
        mixLabels = range(int(np.max(list(componentMeanIndices.values())))+1)
        rlzs[self.labelFeature] = np.atleast_1d(mixLabels)
        rlzs[self.pivotParameter] = self.pivotVariable
        for rlzIndex in mixLabels:
          ## Now we will process each feature available
          ## TODO: Ensure user requests each of these
          if mixtureMeans is not None:
            for featureIdx, feat in enumerate(self.unSupervisedEngine.features):
              ## We will go through the time series and find every instance
              ## where this cluster exists, if it does not, then we put a NaN
              ## to signal that the information is missing for this timestep
              timeSeries = np.zeros(numberOfHistoryStep)
              for timeIdx in range(numberOfHistoryStep):
                loc = componentMeanIndices[timeIdx].index(rlzIndex)
                timeSeries[timeIdx] = mixtureMeans[timeIdx][loc,featureIdx]
              ## In summary, for each feature, we fill a temporary array and
              ## stuff it into the solutionExport, one question is how do we
              ## tell it which item we are exporting? I am assuming that if
              ## I add an input, then I need to do the corresponding
              ## updateOutputValue to associate everything with it? Once I
              ## call updateInputValue again, it will move the pointer? This
              ## needs verified
              if feat not in rlzs.keys():
                rlzs[feat] = copy.copy(timeSeries)
                rlzDims[feat] = [self.pivotParameter]
              else:
                rlzs[feat] = np.vstack((rlzs[feat], copy.copy(timeSeries)))
          ## You may also want to output the covariances of each pair of
          ## dimensions as well
          if mixtureCovars is not None:
            for i,row in enumerate(self.unSupervisedEngine.features.keys()):
              for joffset,col in enumerate(list(self.unSupervisedEngine.features.keys())[i:]):
                j = i+joffset
                timeSeries = np.zeros(numberOfHistoryStep)
                for timeIdx in range(numberOfHistoryStep):
                  loc = componentMeanIndices[timeIdx].index(rlzIndex)
                  timeSeries[timeIdx] = mixtureCovars[timeIdx][loc][i,j]
                covPairName = 'cov_' + str(row) + '_' + str(col)
                if covPairName not in rlzs.keys():
                  rlzs[covPairName] = timeSeries
                  rlzDims[covPairName] = [self.pivotParameter]
                else:
                  rlzs[covPairName] = np.vstack((rlzs[covPairName], timeSeries))
        self.solutionExport.load(rlzs, style='dict',dims=rlzDims)
    elif 'decomposition' == self.unSupervisedEngine.getDataMiningType():
      if self.solutionExport is not None:
        solutionExportDict = self.unSupervisedEngine.metaDict
        ## Get the transformation matrix and push it to a SolutionExport
        ## data object.
        ## Can I be sure of the order of dimensions in the features dict, is
        ## the same order as the data held in the UnSupervisedLearning object?
        if 'components' in solutionExportDict:
          components = solutionExportDict['components']

          ## Note, this implies some data exists (Really this information should
          ## be stored in a dictionary to begin with)
          numComponents,numDimensions = components[0].shape

          componentsArray = np.zeros((numComponents, numberOfHistoryStep, numDimensions))
          evrArray = np.zeros((numComponents, numberOfHistoryStep))

          for timeIdx in range(numberOfHistoryStep):
            for componentIdx,values in enumerate(components[timeIdx]):
              componentsArray[componentIdx,timeIdx,:] = values
              evrArray[componentIdx, timeIdx] = solutionExportDict['explainedVarianceRatio'][timeIdx][componentIdx]

          rlzs = {}
          rlzDims = {}
          ## First store the dimension name as the input for this component
          rlzs[self.labelFeature] =  np.atleast_1d(range(1,numComponents+1))
          rlzs[self.pivotParameter] = self.pivotVariable
          for dimIdx,dimName in enumerate(self.unSupervisedEngine.features.keys()):
            values = componentsArray[:,:,dimIdx]
            rlzs[dimName] = values
            rlzDims[dimName] = [self.pivotParameter]
            if 'explainedVarianceRatio' in solutionExportDict:
              rlzs['ExplainedVarianceRatio'] = evrArray
              rlzDims['ExplainedVarianceRatio'] = [self.pivotParameter]
        self.solutionExport.load(rlzs, style='dict',dims=rlzDims)

    return outputDict