Esempio n. 1
0
 def reseed(self, seed):
     """
   Used to set the underlying random seed.
   @ In, seed, int, new seed to use
   @ Out, None
 """
     randomUtils.randomSeed(seed)
Esempio n. 2
0
  def initialize(self, externalSeeding=None, solutionExport=None):
    """
      This function should be called every time a clean optimizer is needed. Called before takeAstep in <Step>
      @ In, externalSeeding, int, optional, external seed
      @ In, solutionExport, DataObject, optional, a PointSet to hold the solution
      @ Out, None
    """
    AdaptiveSampler.initialize(self, externalSeeding=externalSeeding, solutionExport=solutionExport)
    # functional constraints
    for entry in self.assemblerDict.get('Constraint', []):
      self._constraintFunctions.append(entry[3])

    for entry in self.assemblerDict.get('ImplicitConstraint', []):
      self._impConstraintFunctions.append(entry[3])
    # sampler
    self._initializeInitSampler(externalSeeding)
    # seed
    if self._seed is not None:
      randomUtils.randomSeed(self._seed)
    # variable bounds
    self._variableBounds = {}
    for var in self.toBeSampled:
      dist = self.distDict[var]
      lower = dist.lowerBound if dist.lowerBound is not None else -np.inf
      upper = dist.upperBound if dist.upperBound is not None else np.inf
      self._variableBounds[var] = [lower, upper]
      self.raiseADebug('Set bounds for opt var "{}" to {}'.format(var, self._variableBounds[var]))
    # trajectory initialization
    for i, init in enumerate(self._initialValues):
      self._initialValues[i] = self.normalizeData(init)
      self.initializeTrajectory()
Esempio n. 3
0
 def _generateDistributions(self, availableDist, availableFunc):
     """
   Generates the distributions and functions.
   @ In, availableDist, dict, dict of distributions
   @ In, availableFunc, dict, dict of functions
   @ Out, None
 """
     if self.initSeed != None:
         randomUtils.randomSeed(self.initSeed)
     for key in self.toBeSampled.keys():
         if self.toBeSampled[key] not in availableDist.keys():
             self.raiseAnError(
                 IOError, 'Distribution ' + self.toBeSampled[key] +
                 ' not found among available distributions (check input)!')
         self.distDict[key] = availableDist[self.toBeSampled[key]]
         self.inputInfo['crowDist'][key] = json.dumps(
             self.distDict[key].getCrowDistDict())
     for key, val in self.dependentSample.items():
         if val not in availableFunc.keys():
             self.raiseAnError(
                 'Function', val,
                 'was not found among the available functions:',
                 availableFunc.keys())
         self.funcDict[key] = availableFunc[val]
         # check if the correct method is present
         if "evaluate" not in self.funcDict[key].availableMethods():
             self.raiseAnError(
                 IOError, 'Function ' + self.funcDict[key].name +
                 ' does not contain a method named "evaluate". It must be present if this needs to be used in a Sampler!'
             )
Esempio n. 4
0
 def _incrementCounter(self):
   """
     Incrementes counter and sets up prefix.
     @ In, None
     @ Out, None
   """
   #since we are creating the input for the next run we increase the counter and global counter
   self.counter +=1
   self.auxcnt  +=1
   #exit if over the limit
   if self.counter > self.limit:
     self.raiseADebug('Exceeded number of points requested in sampling!  Moving on...')
   #FIXME, the following condition check is make sure that the require info is only printed once when dump metadata to xml, this should be removed in the future when we have a better way to dump the metadata
   if self.counter >1:
     for key in self.entitiesToRemove:
       self.inputInfo.pop(key,None)
   if self.reseedAtEachIteration:
     randomUtils.randomSeed(self.auxcnt-1)
   self.inputInfo['prefix'] = str(self.counter)
Esempio n. 5
0
  else:
    if updateResults:
      results["pass"] += 1
    return True

### BEGIN TESTS
# NOTE that due to seeding, this test relies HEAVILY on not changing the orders of calls to randomUtils!
# Reseed at the beginning of sections and add new tests to the end of sections.

# set the stochastic environment TODO check both someday?
# cannot pass the numpy as the stochasticEnv
randomUtils.stochasticEnv = 'crow'

eng = randomUtils.newRNG()
# randomSeed(), setting the random seed
randomUtils.randomSeed(42,engine=None)
randomUtils.randomSeed(42,engine=eng)
# check that seed is set
checkAnswer('First float from first seed for engine not provided',randomUtils.random(engine=None),0.374540118847)
checkAnswer('First float from first seed for local engine provided',randomUtils.random(engine=eng),0.374540118847)

# check resetting seed
randomUtils.randomSeed(12345,engine=None) #next float would be 0.95071430641 if seed didn't change
randomUtils.randomSeed(12345,engine=eng) #next float would be 0.95071430641 if seed didn't change
checkAnswer('First float from second seed for engine not provided',randomUtils.random(engine=None),0.929616092817)
checkAnswer('First float from second seed for local engine provided',randomUtils.random(engine=eng),0.929616092817)

### random(), sampling on [0,1]
## single sampling
randomUtils.randomSeed(42,engine=None)
randomUtils.randomSeed(42,engine=eng)
Esempio n. 6
0
    def generateInput(self, model, oldInput):
        """
      This method has to be overwritten to provide the specialization for the specific sampler
      The model instance in might be needed since, especially for external codes,
      only the code interface possesses the dictionary for reading the variable definition syntax
      @ In, model, model instance, it is the instance of a RAVEN model
      @ In, oldInput, list, a list of the original needed inputs for the model (e.g. list of files, etc. etc)
      @ Out, generateInput, tuple(0,list), list contains the new inputs -in reality it is the model that returns this; the Sampler generates the value to be placed in the input of the model.
      The Out parameter depends on the results of generateInput
        If a new point is found, the default Out above is correct.
        If a restart point is found:
          @ Out, generateInput, tuple(int,dict), (1,realization dictionary)
    """
        self.counter += 1  #since we are creating the input for the next run we increase the counter and global counter
        self.auxcnt += 1
        #exit if over the limit
        if self.counter > self.limit:
            self.raiseADebug(
                'Exceeded number of points requested in sampling!  Moving on...'
            )
        #FIXME, the following condition check is make sure that the require info is only printed once when dump metadata to xml, this should be removed in the future when we have a better way to dump the metadata
        if self.counter > 1:
            for key in self.entitiesToRemove:
                self.inputInfo.pop(key, None)
        if self.reseedAtEachIteration:
            randomUtils.randomSeed(self.auxcnt - 1)
        self.inputInfo['prefix'] = str(self.counter)
        model.getAdditionalInputEdits(self.inputInfo)
        self.localGenerateInput(model, oldInput)

        ##### TRANSFORMATION #####
        # add latent variables and original variables to self.inputInfo
        if self.variablesTransformationDict:
            for dist, var in self.variablesTransformationDict.items():
                if self.transformationMethod[dist] == 'pca':
                    self.pcaTransform(var, dist)
                else:
                    self.raiseAnError(
                        NotImplementedError,
                        'transformation method is not yet implemented for ' +
                        self.transformationMethod[dist] + ' method')
        ##### CONSTANT VALUES ######
        self._constantVariables()
        ##### REDUNDANT FUNCTIONALS #####
        # generate the function variable values
        for var in self.dependentSample.keys():
            test = self.funcDict[var].evaluate("evaluate", self.values)
            for corrVar in var.split(","):
                self.values[corrVar.strip()] = test
        ##### RESTART #####
        #check if point already exists
        if self.restartData is not None:
            inExisting = self.restartData.getMatchingRealization(
                self.values, tol=self.restartTolerance)
        else:
            inExisting = None
        #if not found or not restarting, we have a new point!
        if inExisting is None:
            self.raiseADebug('Found new point to sample:', self.values)
            ## The new info for the perturbed run will be stored in the sampler's
            ## inputInfo (I don't particularly like this, I think it should be
            ## returned here, but let's get this working and then we can decide how
            ## to best pass this information around. My reasoning is that returning
            ## it here means the sampler does not need to store it, and we can return
            ## a copy of the information, otherwise we have to be careful to create a
            ## deep copy of this information when we submit it to a job).
            ## -- DPM 4/18/17
            return 0, oldInput
        #otherwise, return the restart point
        else:
            self.raiseADebug('Point found in restart:', inExisting['inputs'])
            realization = {}
            realization['metadata'] = copy.deepcopy(self.inputInfo)
            realization['inputs'] = inExisting['inputs']
            realization['outputs'] = inExisting['outputs']
            realization['prefix'] = self.inputInfo['prefix']
            return 1, realization
Esempio n. 7
0
    def initialize(self, externalSeeding=None, solutionExport=None):
        """
      This function should be called every time a clean sampler is needed. Called before takeAstep in <Step>
      @ In, externalSeeding, int, optional, external seed
      @ In, solutionExport, DataObject, optional, in goal oriented sampling (a.k.a. adaptive sampling this is where the space/point satisfying the constrains)
      @ Out, None
    """
        if self.initSeed == None:
            self.initSeed = randomUtils.randomIntegers(0, 2**31, self)
        self.counter = 0
        if not externalSeeding:
            randomUtils.randomSeed(
                self.initSeed)  #use the sampler initialization seed
            self.auxcnt = self.initSeed
        elif externalSeeding == 'continue':
            pass  #in this case the random sequence needs to be preserved
        else:
            randomUtils.randomSeed(
                externalSeeding)  #the external seeding is used
            self.auxcnt = externalSeeding
        #grab restart dataobject if it's available, then in localInitialize the sampler can deal with it.
        if 'Restart' in self.assemblerDict.keys():
            self.raiseADebug('Restart object: ' +
                             str(self.assemblerDict['Restart']))
            self.restartData = self.assemblerDict['Restart'][0][3]
            self.raiseAMessage('Restarting from ' + self.restartData.name)
            #check consistency of data
            try:
                rdata = self.restartData.getAllMetadata()['crowDist']
                sdata = self.inputInfo['crowDist']
                self.raiseAMessage('sampler inputs:')
                for sk, sv in sdata.items():
                    self.raiseAMessage('|   ' + str(sk) + ': ' + str(sv))
                for i, r in enumerate(rdata):
                    if type(r) != dict:
                        continue
                    if not r == sdata:
                        self.raiseAMessage('restart inputs %i:' % i)
                        for rk, rv in r.items():
                            self.raiseAMessage('|   ' + str(rk) + ': ' +
                                               str(rv))
                        self.raiseAnError(
                            IOError,
                            'Restart "%s" data[%i] does not have same inputs as sampler!'
                            % (self.restartData.name, i))
            except KeyError as e:
                self.raiseAWarning(
                    "No CROW distribution available in restart -", e)
        else:
            self.raiseAMessage('No restart for ' + self.printTag)

        #load restart data into existing points
        if self.restartData is not None:
            if not self.restartData.isItEmpty():
                inps = self.restartData.getInpParametersValues()
                outs = self.restartData.getOutParametersValues()
                #FIXME there is no guarantee ordering is accurate between restart data and sampler
                inputs = list(v for v in inps.values())
                existingInps = zip(*inputs)
                outVals = zip(*list(v for v in outs.values()))
                self.existing = dict(zip(existingInps, outVals))

        #specializing the self.localInitialize() to account for adaptive sampling
        if solutionExport != None:
            self.localInitialize(solutionExport=solutionExport)
        else:
            self.localInitialize()

        for distrib in self.NDSamplingParams:
            if distrib in self.distributions2variablesMapping:
                params = self.NDSamplingParams[distrib]
                temp = utils.first(
                    self.distributions2variablesMapping[distrib][0].keys())
                self.distDict[temp].updateRNGParam(params)
            else:
                self.raiseAnError(
                    IOError,
                    'Distribution "%s" specified in distInit block of sampler "%s" does not exist!'
                    % (distrib, self.name))

        # Store the transformation matrix in the metadata
        if self.variablesTransformationDict:
            self.entitiesToRemove = []
            for variable in self.variables2distributionsMapping.keys():
                distName = self.variables2distributionsMapping[variable][
                    'name']
                dim = self.variables2distributionsMapping[variable]['dim']
                totDim = self.variables2distributionsMapping[variable][
                    'totDim']
                if totDim > 1 and dim == 1:
                    transformDict = {}
                    transformDict['type'] = self.distDict[
                        variable.strip()].type
                    transformDict['transformationMatrix'] = self.distDict[
                        variable.strip()].transformationMatrix()
                    self.inputInfo['transformation-' +
                                   distName] = transformDict
                    self.entitiesToRemove.append('transformation-' + distName)
Esempio n. 8
0
  def initialize(self,externalSeeding=None,solutionExport=None):
    """
      This function should be called every time a clean sampler is needed. Called before takeAstep in <Step>
      @ In, externalSeeding, int, optional, external seed
      @ In, solutionExport, DataObject, optional, in goal oriented sampling (a.k.a. adaptive sampling this is where the space/point satisfying the constrains)
      @ Out, None
    """
    if self.initSeed == None:
      self.initSeed = randomUtils.randomIntegers(0,2**31,self)
    self.counter = 0
    if not externalSeeding:
      randomUtils.randomSeed(self.initSeed)       #use the sampler initialization seed
      self.auxcnt = self.initSeed
    elif externalSeeding=='continue':
      pass        #in this case the random sequence needs to be preserved
    else                              :
      randomUtils.randomSeed(externalSeeding)     #the external seeding is used
      self.auxcnt = externalSeeding
    #grab restart dataobject if it's available, then in localInitialize the sampler can deal with it.
    if 'Restart' in self.assemblerDict.keys():
      self.raiseADebug('Restart object: '+str(self.assemblerDict['Restart']))
      self.restartData = self.assemblerDict['Restart'][0][3]
      # check the right variables are in the restart
      need = set(self.toBeSampled.keys()+self.dependentSample.keys())
      if not need.issubset(set(self.restartData.getVars())):
        missing = need - set(self.restartData.getVars())
        #TODO this could be a warning, instead, but user wouldn't see it until the run was deep in
        self.raiseAnError(KeyError,'Restart data object "{}" is missing the following variables: "{}". No restart can be performed.'.format(self.restartData.name,', '.join(missing)))
      else:
        self.raiseAMessage('Restarting from '+self.restartData.name)
      # we used to check distribution consistency here, but we want to give more flexibility to using
      #   restart data, so do NOT check distributions of restart data.
    else:
      self.raiseAMessage('No restart for '+self.printTag)

    #load restart data into existing points
    # TODO do not copy data!  Read directly from restart.
    #if self.restartData is not None:
    #  if len(self.restartData) > 0:
    #    inps = self.restartData.getInpParametersValues()
    #    outs = self.restartData.getOutParametersValues()
    #    #FIXME there is no guarantee ordering is accurate between restart data and sampler
    #    inputs = list(v for v in inps.values())
    #    existingInps = zip(*inputs)
    #    outVals = zip(*list(v for v in outs.values()))
    #    self.existing = dict(zip(existingInps,outVals))

    #specializing the self.localInitialize() to account for adaptive sampling
    if solutionExport != None:
      self.localInitialize(solutionExport=solutionExport)
    else:
      self.localInitialize()

    for distrib in self.NDSamplingParams:
      if distrib in self.distributions2variablesMapping:
        params = self.NDSamplingParams[distrib]
        temp = utils.first(self.distributions2variablesMapping[distrib][0].keys())
        self.distDict[temp].updateRNGParam(params)
      else:
        self.raiseAnError(IOError,'Distribution "%s" specified in distInit block of sampler "%s" does not exist!' %(distrib,self.name))

    # Store the transformation matrix in the metadata
    if self.variablesTransformationDict:
      self.entitiesToRemove = []
      for variable in self.variables2distributionsMapping.keys():
        distName = self.variables2distributionsMapping[variable]['name']
        dim      = self.variables2distributionsMapping[variable]['dim']
        totDim   = self.variables2distributionsMapping[variable]['totDim']
        if totDim > 1 and dim  == 1:
          transformDict = {}
          transformDict['type'] = self.distDict[variable.strip()].type
          transformDict['transformationMatrix'] = self.distDict[variable.strip()].transformationMatrix()
          self.inputInfo['transformation-'+distName] = transformDict
          self.entitiesToRemove.append('transformation-'+distName)

    # Register expected metadata
    meta = ['ProbabilityWeight','prefix','PointProbability']
    for var in self.toBeSampled.keys():
      meta +=  ['ProbabilityWeight-'+ key for key in var.split(",")]
    self.addMetaKeys(*meta)
Esempio n. 9
0
        return False
    else:
        if updateResults:
            results["pass"] += 1
        return True


### BEGIN TESTS
# NOTE that due to seeding, this test relies HEAVILY on not changing the orders of calls to randomUtils!
# Reseed at the beginning of sections and add new tests to the end of sections.

# set the stochastic environment TODO check both someday?
randomUtils.stochasticEnv = 'crow'

# randomSeed(), setting the random seed
randomUtils.randomSeed(42)
# check that seed is set
checkAnswer('First float from first seed', randomUtils.random(),
            0.374540118847)
# check resetting seed
randomUtils.randomSeed(
    12345)  #next float would be 0.95071430641 if seed didn't change
checkAnswer('First float from second seed', randomUtils.random(),
            0.929616092817)

### random(), sampling on [0,1]
## single sampling
randomUtils.randomSeed(42)
vals = np.array([randomUtils.random() for _ in range(int(1e5))])
mean = np.average(vals)
stdv = np.std(vals)
Esempio n. 10
0
    def initialize(self, externalSeeding=None, solutionExport=None):
        """
      This function should be called every time a clean optimizer is needed. Called before takeAstep in <Step>
      @ In, externalSeeding, int, optional, external seed
      @ In, solutionExport, DataObject, optional, a PointSet to hold the solution
      @ Out, None
    """
        for entry in self.assemblerDict.get('Preconditioner', []):
            cls, typ, name, model = entry
            if cls != 'Models' or typ != 'ExternalModel':
                self.raiseAnError(
                    IOError,
                    'Currently only "ExternalModel" models can be used as preconditioners! Got "{}.{}" for "{}".'
                    .format(cls, typ, name))
            self.preconditioners[name] = model
            model.initialize({}, [])

        for entry in self.assemblerDict.get('Sampler', []):
            cls, typ, name, sampler = entry
            forwardSampler = False
            for baseClass in sampler.__class__.__bases__:
                if "ForwardSampler" in baseClass.__name__:
                    forwardSampler = True
                    break
            if not forwardSampler:
                self.raiseAnError(
                    IOError,
                    'Only "ForwardSampler"s (e.g. MonteCarlo, Grid, etc.) can be used for initializing the trajectories in the Optimizer! Got "{}.{}" for "{}".'
                    .format(cls, typ, name))
            self.initializationSampler = sampler
            initDict = {}
            for entity in ['Distributions', 'Functions', 'DataObjects']:
                initDict[entity] = dict(
                    (entry[2], entry[3])
                    for entry in self.assemblerDict.get(entity, []))
            self.initializationSampler._localGenerateAssembler(initDict)
            for key in self.initializationSampler.getInitParams().keys():
                if key.startswith("sampled variable:"):
                    var = key.replace("sampled variable:", "").strip()
                    # check if the sampled variables are among the optimization parameters
                    if var not in self.getOptVars():
                        self.raiseAnError(
                            IOError, 'The variable "' + var +
                            '" sampled by the initialization Sampler "' +
                            self.initializationSampler.name +
                            '" is not among the optimization parameters!')
                    # check if the sampled variables have been already initialized in the optimizer (i.e. <initial>)
                    if self.optVarsInitialized[var]:
                        self.raiseAnError(
                            IOError, 'The variable "' + var +
                            '" sampled by the initialization Sampler "' +
                            self.initializationSampler.name +
                            '" has been already initialized in the Optimizer block. Remove <initial> XML node in Optimizer or the <variable> XML node in the Sampler!'
                        )
            # generate the initial coordinates by the sampler and check if they are inside the boundaries
            self.initializationSampler.initialize(externalSeeding)
            # check the number of trajectories (i.e. self.initializationSample.limit in the Sampler)
            currentNumberTrajectories = len(self.optTraj)
            if currentNumberTrajectories > 1:
                if currentNumberTrajectories != self.initializationSampler.limit:
                    self.raiseAnError(
                        IOError,
                        "The number of samples generated by the initialization Sampler are different "
                        +
                        "than the one inputted in the Optimizer (from the variables where the <initial> XML block has been inputted)"
                    )
            else:
                self.optTraj = list(range(self.initializationSampler.limit))
                for varName in self.optVarsInit['initial'].keys():
                    self.optVarsInit['initial'][varName] = dict.fromkeys(
                        self.optTraj, self.optVarsInit['initial'][varName][0])
            while self.initializationSampler.amIreadyToProvideAnInput():
                self.initializationSampler.localGenerateInput(None, None)
                self.initializationSampler.inputInfo[
                    'prefix'] = self.initializationSampler.counter
                sampledVars = self.initializationSampler.inputInfo[
                    'SampledVars']
                for varName, value in sampledVars.items():
                    self.optVarsInit['initial'][varName][
                        self.initializationSampler.counter] = np.atleast_1d(
                            value)
                self.initializationSampler.counter += 1

        # NOTE: counter['varsUpdate'] needs to be set AFTER self.optTraj length is set by the sampler (if used exclusively)
        self.counter['mdlEval'] = 0
        self.counter['varsUpdate'] = [0] * len(self.optTraj)
        self.optTrajLive = copy.deepcopy(self.optTraj)

        self.mdlEvalHist = self.assemblerDict['TargetEvaluation'][0][3]
        # check if the TargetEvaluation feature and target spaces are consistent
        ins = self.mdlEvalHist.getVars("input")
        outs = self.mdlEvalHist.getVars("output")
        for varName in self.fullOptVars:
            if varName not in ins:
                self.raiseAnError(
                    RuntimeError, "the optimization variable " + varName +
                    " is not contained in the TargetEvaluation object " +
                    self.mdlEvalHist.name)
        if self.objVar not in outs:
            self.raiseAnError(
                RuntimeError,
                "the optimization objective variable " + self.objVar +
                " is not contained in the TargetEvaluation object " +
                self.mdlEvalHist.name)
        self.objSearchingROM = SupervisedLearning.returnInstance(
            'SciKitLearn', self, **{
                'SKLtype': 'neighbors|KNeighborsRegressor',
                'Features': ','.join(list(self.fullOptVars)),
                'Target': self.objVar,
                'n_neighbors': 1,
                'weights': 'distance'
            })
        self.solutionExport = solutionExport
        if self.solutionExport is None:
            self.raiseAnError(
                IOError,
                'The results of optimization cannot be obtained without a SolutionExport defined in the Step!'
            )

        if type(solutionExport).__name__ not in ["PointSet", "DataSet"]:
            self.raiseAnError(IOError,'solutionExport type must be a PointSet or DataSet. Got '+\
                                       type(solutionExport).__name__+ '!')

        if 'Function' in self.assemblerDict.keys():
            self.constraintFunction = self.assemblerDict['Function'][0][3]
            if 'constrain' not in self.constraintFunction.availableMethods():
                self.raiseAnError(
                    IOError,
                    'the function provided to define the constraints must have an implemented method called "constrain"'
                )

        # initialize dictionary entries
        # TODO a bunch of the gradient-level trajectory initializations should be moved here.
        for traj in self.optTraj:
            self.optVars[traj] = self.getOptVars()
            self.submissionQueue[traj] = deque()

        #check initial point array consistency
        rightLen = len(self.optTraj)  #the hypothetical correct length
        for var in self.getOptVars():
            haveLen = len(self.optVarsInit['initial'][var])
            if haveLen != rightLen:
                self.raiseAnError(
                    RuntimeError,
                    'The number of trajectories for variable "{}" is incorrect!  Got {} but expected {}!  Check the <initial> block.'
                    .format(var, haveLen, rightLen))

        # check the constraint here to check if the initial values violate it
        varK = {}
        for trajInd in self.optTraj:
            for varName in self.getOptVars():
                varK[varName] = self.optVarsInit['initial'][varName][trajInd]
            satisfied, _ = self.checkConstraint(varK)
            if not satisfied:
                # get a random value between the the lower and upper bounds
                self.raiseAWarning(
                    "the initial values specified for trajectory " +
                    str(trajInd) +
                    " do not satify the contraints. Picking random ones!")
                randomGuessesCnt = 0
                while not satisfied and randomGuessesCnt < self.constraintHandlingPara[
                        'innerLoopLimit']:
                    for varName in self.getOptVars():
                        varK[varName] = self.optVarsInit['lowerBound'][
                            varName] + randomUtils.random(
                            ) * self.optVarsInit['ranges'][varName]
                        self.optVarsInit['initial'][varName][trajInd] = varK[
                            varName]
                    satisfied, _ = self.checkConstraint(varK)
                if not satisfied:
                    self.raiseAnError(
                        Exception,
                        "It was not possible to find any initial values that could satisfy the constraints for trajectory "
                        + str(trajInd))

        # extend multivalue variables (aka vector variables, or variables with "shape")
        ## TODO someday take array of initial values from a DataSet
        for var, shape in self.variableShapes.items():
            if np.prod(shape) > 1:
                for traj in self.optTraj:
                    baseVal = self.optVarsInit['initial'][var][traj]
                    newVal = np.ones(shape) * baseVal
                    self.optVarsInit['initial'][var][traj] = newVal

        if self.initSeed is not None:
            randomUtils.randomSeed(self.initSeed)

        self.localInitialize(solutionExport=solutionExport)
Esempio n. 11
0
File: ARMA.py Progetto: HCBINL/raven
  def characterize(self, signal, pivot, targets, settings):
    """
      Determines the charactistics of the signal based on this algorithm.
      @ In, signal, np.ndarray, time series with dims [time, target]
      @ In, pivot, np.1darray, time-like parameter values
      @ In, targets, list(str), names of targets in same order as signal
      @ In, settings, dict, settings for this ROM
      @ Out, params, dict, characteristic parameters
    """
    # lazy import statsmodels
    import statsmodels.api
    # settings:
    #   P: number of AR terms to use (signal lag)
    #   Q: number of MA terms to use (noise lag)
    #   gaussianize: whether to "whiten" noise before training
    # set seed for training
    seed = settings['seed']
    if seed is not None:
      randomUtils.randomSeed(seed, engine=settings['engine'], seedBoth=True)

    params = {}
    for tg, target in enumerate(targets):
      params[target] = {}
      history = signal[:, tg]
      if settings.get('gaussianize', True):
        # Transform data to obatain normal distrbuted series. See
        # J.M.Morales, R.Minguez, A.J.Conejo "A methodology to generate statistically dependent wind speed scenarios,"
        # Applied Energy, 87(2010) 843-855
        # -> then train independent ARMAs
        params[target]['cdf'] = mathUtils.characterizeCDF(history, binOps=2, minBins=self._minBins)
        normed = mathUtils.gaussianize(history, params[target]['cdf'])
      else:
        normed = history
      # TODO correlation (VARMA) as well as singular -> maybe should be independent TSA algo?
      P = settings['P']
      Q = settings['Q']
      d = settings.get('d', 0)
      # TODO just use SARIMAX?
      model = statsmodels.tsa.arima.model.ARIMA(normed, order=(P, d, Q))
      res = model.fit(low_memory=settings['reduce_memory'])
      # NOTE on low_memory use, test using SyntheticHistory.ARMA test:
      #   case    | time used (s) | memory used (MiB)
      #   low mem | 2.570851      | 0.5
      #   no arg  | 2.153929      | 2.8
      #   using low_memory, fit() takes an extra 0.4 seconds and uses 2 MB less
      # NOTE additional interesting arguments to model.fit:
      # -> method_kwargs passes arguments to scipy.optimize.fmin_l_bfgs_b() as kwargs
      #   -> disp: int, 0 or 50 or 100, in order of increasing verbosity for fit solve
      #   -> pgtol: gradient norm tolerance before quitting solve (usually not the limiter)
      #   -> factr: "factor" for exiting solve, roughly as f_new - f_old / scaling <= factr * eps
      #              default is 1e10 (loose solve), medium is 1e7, extremely tight is 1e1
      #   e.g. method_kwargs={'disp': 1, 'pgtol': 1e-9, 'factr': 10.0})
      ## get initial state distribution stats
      # taken from old statsmodels.tsa.statespace.kalman_filter.KalmanFilter.simulate
      smoother = model.ssm
      initMean = np.linalg.solve(np.eye(smoother.k_states) - smoother['transition',:,:,0], smoother['state_intercept',:,0])
      r = smoother['selection',:,:,0]
      q = smoother['state_cov',:,:,0]
      selCov = r.dot(q).dot(r.T)
      initCov = sp.linalg.solve_discrete_lyapunov(smoother['transition',:,:,0], selCov)
      initDist = {'mean': initMean, 'cov': initCov}
      params[target]['arma'] = {'const': res.params[0], # exog/intercept/constant
                                'ar': res.arparams,     # AR
                                'ma': res.maparams,     # MA
                                'var': res.params[-1],  # variance
                                'initials': initDist,   # characteristics for sampling initial states
                                'model': model}
      if not settings['reduce_memory']:
        params[target]['arma']['results'] = res
    return params