def evaluate(self, opt, grads, infos, objVar): """ Approximates gradient based on evaluated points. @ In, opt, dict, current opt point (normalized) @ In, grads, list(dict), evaluated neighbor points @ In, infos, list(dict), info about evaluated neighbor points @ In, objVar, string, objective variable @ Out, magnitude, float, magnitude of gradient @ Out, direction, dict, versor (unit vector) for gradient direction @ Out, foundInf, bool, if True then infinity calculations were used """ gradient = {} for g, pt in enumerate(grads): info = infos[g] delta = info['delta'] activeVar = info['optVar'] lossDiff = np.atleast_1d( mathUtils.diffWithInfinites(pt[objVar], opt[objVar])) grad = lossDiff / delta gradient[activeVar] = grad # obtain the magnitude and versor of the gradient to return magnitude, direction, foundInf = mathUtils.calculateMagnitudeAndVersor( list(gradient.values())) direction = dict((var, float(direction[v])) for v, var in enumerate(gradient.keys())) return magnitude, direction, foundInf
def localEvaluateGradient(self, traj, gradHist = False): """ Local method to evaluate gradient. @ In, traj, int, the trajectory id @ In, gradHist, bool, optional, whether store self.counter['gradientHistory'] in this step. @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ # this method used to take a gradient estimation. Nothing actually used it, though. - PWT, 2018-10 # denoising has already been performed, so get the results opt = self.realizations[traj]['denoised']['opt'][0] # opt point is only one point pert = self.realizations[traj]['denoised']['grad'][0] # SPSA CURRENTLY only has one grad point gradient = {} # difference in objective variable lossDiff = mathUtils.diffWithInfinites(pert[self.objVar], opt[self.objVar]) #gives pert[ans] - opt[ans] # we only need the +/- 1, we don't need the gradient value at all. lossDiff = 1.0 if lossDiff > 0.0 else -1.0 # force gradient descent if self.optType == 'max': lossDiff *= -1.0 # difference in input variables for var in self.getOptVars(): dh = pert[var] - opt[var] # keep dimensionality consistent, so at least 1D gradient[var] = np.atleast_1d(lossDiff * dh) return gradient
def localEvaluateGradient(self, traj): """ Local method to evaluate gradient. @ In, traj, int, the trajectory id @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ gradient = {} inVars = self.getOptVars() opt = self.realizations[traj]['denoised']['opt'][0] allGrads = self.realizations[traj]['denoised']['grad'] for g, pert in enumerate(allGrads): var = inVars[g] lossDiff = mathUtils.diffWithInfinites(pert[self.objVar], opt[self.objVar]) # unlike SPSA, keep the loss diff magnitude so we get the exact right direction if self.optType == 'max': lossDiff *= -1.0 dh = pert[var] - opt[var] if abs(dh) < 1e-15: self.raiseADebug('Checking Var:', var) self.raiseADebug('Opt point :', opt) self.raiseADebug('Grad point :', pert) self.raiseAnError( RuntimeError, 'While calculating the gradArray a "dh" very close to zero was found for var:', var) gradient[var] = np.atleast_1d(lossDiff / dh) return gradient
def localEvaluateGradient(self, optVarsValues, traj, gradient=None): """ Local method to evaluate gradient. @ In, optVarsValues, dict, dictionary containing perturbed points. optVarsValues should have the form {pertIndex: {varName: [varValue1 varValue2]}} Therefore, each optVarsValues[pertIndex] should return a dict of variable values that is sufficient for gradient evaluation for at least one variable (depending on specific optimization algorithm) @ In, traj, int, the trajectory id @ In, gradient, dict, optional, dictionary containing gradient estimation by the caller. gradient should have the form {varName: gradEstimation} @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ gradArray = {} optVars = self.getOptVars(traj=traj) numRepeats = self.gradDict['numIterForAve'] for var in optVars: gradArray[var] = np.zeros(self.gradDict['numIterForAve'], dtype=object) # optVarsValues: # - the first <numRepeats> entries are the opt point (from 0 to numRepeats-1) # - the next <numRepeats> entries are one each in each direction in turns (dx1, dy1, dx2, dy2, etc) # dx are [lastOpt +1, lastOpt +3, lastOpt +5, etc] -> [lastOpt + <#var>*<index repeat>+1] # dy are [lastOpt +2, lastOpt +4, lastOpt +6, etc] # Evaluate gradient at each point for i in range(numRepeats): opt = optVarsValues[i] #the latest opt point for j in range(self.paramDict['pertSingleGrad'] ): # AKA for each input variable # loop over the perturbation to construct the full gradient ## first numRepeats are all the opt point, not the perturbed point ## then, need every Nth entry, where N is the number of variables pert = optVarsValues[numRepeats + i * len(optVars) + j] #the perturbed point #calculate grad(F) wrt each input variable lossDiff = mathUtils.diffWithInfinites(pert['output'], opt['output']) #cover "max" problems # TODO it would be good to cover this in the base class somehow, but in the previous implementation this # sign flipping was only called when evaluating the gradient. if self.optType == 'max': lossDiff *= -1.0 var = optVars[j] # gradient is calculated in normalized space dh = pert['inputs'][var] - opt['inputs'][var] if abs(dh) < 1e-15: self.raiseADebug('Values:', pert['inputs'][var], opt['inputs'][var]) self.raiseAnError( RuntimeError, 'While calculating the gradArray a "dh" very close to zero was found for var:', var) gradArray[var][i] = lossDiff / dh gradient = {} for var in optVars: gradient[var] = np.atleast_1d(gradArray[var].mean()) return gradient
def localEvaluateGradient(self, optVarsValues, traj, gradient=None): """ Local method to evaluate gradient. @ In, optVarsValues, dict, dictionary containing perturbed points. optVarsValues should have the form {pertIndex: {varName: [varValue1 varValue2]}} Therefore, each optVarsValues[pertIndex] should return a dict of variable values that is sufficient for gradient evaluation for at least one variable (depending on specific optimization algorithm) @ In, traj, int, the trajectory id @ In, gradient, dict, optional, dictionary containing gradient estimation by the caller. gradient should have the form {varName: gradEstimation} @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ gradArray = {} optVars = self.getOptVars(traj=traj) for var in optVars: gradArray[var] = np.zeros(self.gradDict['numIterForAve']) # Evaluate gradient at each point for i in range(self.gradDict['numIterForAve']): opt = optVarsValues[i] #the latest opt point for j in range(self.paramDict['pertSingleGrad']): # loop over the perturbation to construct the full gradient pert = optVarsValues[self.gradDict['numIterForAve'] + i + j] #the perturbed point #calculate grad(F) wrt each input variable lossDiff = mathUtils.diffWithInfinites(pert['output'], opt['output']) #cover "max" problems # TODO it would be good to cover this in the base class somehow, but in the previous implementation this # sign flipping was only called when evaluating the gradient. if self.optType == 'max': lossDiff *= -1.0 var = optVars[j] # gradient is calculated in normalized space dh = pert['inputs'][var] - opt['inputs'][var] if abs(dh) < 1e-15: self.raiseAnError( RuntimeError, 'While calculating the gradArray a "dh" very close to zero was found for var:', var) gradArray[var][i] = lossDiff / dh gradient = {} for var in optVars: gradient[var] = np.atleast_1d(gradArray[var].mean()) return gradient
def localEvaluateGradient(self, optVarsValues, traj, gradient=None): """ Local method to evaluate gradient. @ In, optVarsValues, dict, dictionary containing perturbed points. optVarsValues should have the form {pertIndex: {varName: [varValue1 varValue2]}} Therefore, each optVarsValues[pertIndex] should return a dict of variable values that is sufficient for gradient evaluation for at least one variable (depending on specific optimization algorithm) @ In, traj, int, the trajectory id @ In, gradient, dict, optional, dictionary containing gradient estimation by the caller. gradient should have the form {varName: gradEstimation} @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ gradArray = {} # number of gradient evaluations to consider (denoising or resampling) numGrads = self.gradDict['numIterForAve'] # prepopulate array of collected gradient for var in self.getOptVars(traj=traj): gradArray[var] = np.zeros(numGrads, dtype=object) # Evaluate gradient at each point for i in range(numGrads): opt = optVarsValues[i] #the latest opt point pert = optVarsValues[i + numGrads] #the perturbed point # calculate grad(F) wrt each input variable # fix infinities! lossDiff = mathUtils.diffWithInfinites(pert['output'], opt['output']) #cover "max" problems # TODO it would be good to cover this in the base class somehow, but in the previous implementation this # sign flipping was only called when evaluating the gradient. # Perhaps the sign should flip when evaluating the next point to take, instead of forcing gradient descent if self.optType == 'max': lossDiff *= -1.0 for var in self.getOptVars(traj=traj): # NOTE: gradient is calculated in normalized space dh = pert['inputs'][var] - opt['inputs'][var] # a sample so close cannot be taken without violating minimum step, so this check should not be necessary (left for reference) #if abs(dh) < 1e-15: # self.raiseAnError(RuntimeError,'While calculating the gradArray a "dh" of zero was found for var:',var) gradArray[var][i] = lossDiff / dh gradient = {} for var in self.getOptVars(traj=traj): mean = gradArray[var].mean() gradient[var] = np.atleast_1d(mean) return gradient
def evaluate(self, opt, grads, infos, objVar): """ Approximates gradient based on evaluated points. @ In, opt, dict, current opt point (normalized) @ In, grads, list(dict), evaluated neighbor points @ In, infos, list(dict), info about evaluated neighbor points @ In, objVar, string, objective variable @ Out, magnitude, float, magnitude of gradient @ Out, direction, dict, versor (unit vector) for gradient direction """ gradient = {} lossDiff = np.atleast_1d(mathUtils.diffWithInfinites(grads[0][objVar], opt[objVar])) for var in self._optVars: # don't assume delta is unchanged; calculate it here delta = grads[0][var] - opt[var] gradient[var] = lossDiff / delta magnitude, direction, foundInf = mathUtils.calculateMagnitudeAndVersor(list(gradient.values())) direction = dict((var, float(direction[v])) for v, var in enumerate(gradient.keys())) return magnitude, direction, foundInf
def localEvaluateGradient(self, traj, gradHist = False): """ Local method to evaluate gradient. @ In, traj, int, the trajectory id @ In, gradHist, bool, optional, whether store self.counter['gradientHistory'] in this step. @ Out, gradient, dict, dictionary containing gradient estimation. gradient should have the form {varName: gradEstimation} """ gradient = {} inVars = self.getOptVars() opt = self.realizations[traj]['denoised']['opt'][0] allGrads = self.realizations[traj]['denoised']['grad'] gi = {} for g,pert in enumerate(allGrads): varId = g % len(inVars) var = inVars[varId] lossDiff = mathUtils.diffWithInfinites(pert[self.objVar],opt[self.objVar]) # unlike SPSA, keep the loss diff magnitude so we get the exact right direction if self.optType == 'max': lossDiff *= -1.0 dh = pert[var] - opt[var] if abs(dh) < 1e-15: self.raiseADebug('Checking Var:',var) self.raiseADebug('Opt point :',opt) self.raiseADebug('Grad point :',pert) self.raiseAnError(RuntimeError,'While calculating the gradArray a "dh" very close to zero was found for var:',var) if gradient.get(var) == None: gi[var] = 0 gradient[var] = np.atleast_1d(lossDiff / dh) else: gi[var] += 1 gradient[var] = (gradient[var] + np.atleast_1d(lossDiff / dh))* gi[var]/(gi[var] + 1) if gradHist: try: self.counter['gradientHistory'][traj][1] = self.counter['gradientHistory'][traj][0] except IndexError: pass # don't have a history on the first pass self.counter['gradientHistory'][traj][0] = gradient return gradient
def _updateConvergenceVector(self, traj, varsUpdate, currentLossVal): """ Local method to update convergence vector. @ In, traj, int, identifier of the trajector to update @ In, varsUpdate, int, current variables update iteration number @ In, currentLossVal, float, current loss function value @ Out, None """ # first, check if we're at varsUpdate 0 (first entry); if so, we are at our first point if varsUpdate == 0: # we don't have enough points to decide to accept or reject the new point, so accept it as the initial point self.raiseADebug( 'Accepting first point, since we have no rejection criteria.') self.status[traj]['reason'] = 'found new opt point' return #otherwise, we need to accept/reject point and check convergence currentInputDenorm = self.denormalizeData( self.optVarsHist[traj][self.counter['varsUpdate'][traj]]) ## first, determine if we want to keep the new point # obtain the old loss value oldLossVal = self.counter['recentOptHist'][traj][0][self.objVar] # see if new point is better than old point newerIsBetter = self.checkIfBetter(currentLossVal, oldLossVal) # if this was a recommended preconditioning point, we should not be converged. pointFromRecommendation = self.status[traj][ 'reason'] == 'received recommended point' # if improved, keep it and move forward; otherwise, reject it and recommend cutting step size if newerIsBetter: self.status[traj]['reason'] = 'found new opt point' self.raiseADebug( 'Accepting potential opt point for improved loss value. Diff: {}, New: {}, Old: {}' .format(abs(currentLossVal - oldLossVal), currentLossVal, oldLossVal)) #TODO REWORK this belongs in the base class optimizer; grad shouldn't know about multilevel!! # -> this parameter is how multilevel knows that a successful perturbation of an outer loop has been performed # maybe implement a "acceptPoint" method in base class? self.mlActiveSpaceSteps[traj] += 1 else: self.status[traj]['reason'] = 'rejecting bad opt point' self.raiseADebug( 'Rejecting potential opt point for worse loss value. old: "{}", new: "{}"' .format(oldLossVal, currentLossVal)) # cut the next step size to hopefully stay in the valley instead of climb up the other side self.recommendToGain[traj] = 'cut' ## determine convergence if pointFromRecommendation: self.raiseAMessage( 'Setting convergence for Trajectory "{}" to "False" because of preconditioning.' .format(traj)) converged = False else: self.raiseAMessage( 'Checking convergence for Trajectory "{}":'.format(traj)) self.convergenceProgress[traj] = { } # tracks progress for grad norm, abs, rel tolerances converged = False # updated for each individual criterion using "or" (pass one, pass all) #printing utility printString = ' {:<21}: {:<5}' printVals = printString + ' (check: {:>+9.2e} < {:>+9.2e}, diff: {:>9.2e})' def printProgress(name, boolCheck, test, gold): """ Consolidates a commonly-used print statement to prevent errors and improve readability. @ In, name, str, printed name of convergence check @ In, boolCheck, bool, boolean convergence results for this check @ In, test, float, value of check at current opt point @ In, gold, float, convergence threshold value @ Out, None """ self.raiseAMessage( printVals.format(name, str(boolCheck), test, gold, abs(test - gold))) # "min step size" and "gradient norm" are both always valid checks, whether rejecting or accepting new point # min step size check try: lastStep = self.counter['lastStepSize'][traj] minStepSizeCheck = lastStep <= self.minStepSize except KeyError: #we reset the step size, so we don't have a value anymore lastStep = np.nan minStepSizeCheck = False printProgress('Min step size', minStepSizeCheck, lastStep, self.minStepSize) converged = converged or minStepSizeCheck # gradient norm if len(self.counter['gradientHistory'][traj][0]) > 0: gradNorm = self.counter['gradNormHistory'][traj][0] self.convergenceProgress[traj]['grad'] = gradNorm gradientNormCheck = gradNorm <= self.gradientNormTolerance else: gradNorm = np.nan gradientNormCheck = False printProgress('Gradient magnitude', gradientNormCheck, gradNorm, self.gradientNormTolerance) converged = converged or gradientNormCheck # if accepting new point, then "same coordinate" and "abs" and "rel" checks are also valid reasons to converge if newerIsBetter: #absolute tolerance absLossDiff = abs( mathUtils.diffWithInfinites(currentLossVal, oldLossVal)) self.convergenceProgress[traj]['abs'] = absLossDiff absTolCheck = absLossDiff <= self.absConvergenceTol printProgress('Absolute Loss Diff', absTolCheck, absLossDiff, self.absConvergenceTol) converged = converged or absTolCheck #relative tolerance relLossDiff = mathUtils.relativeDiff(currentLossVal, oldLossVal) self.convergenceProgress[traj]['rel'] = relLossDiff relTolCheck = relLossDiff <= self.relConvergenceTol printProgress('Relative Loss Diff', relTolCheck, relLossDiff, self.relConvergenceTol) converged = converged or relTolCheck #same coordinate check sameCoordinateCheck = True for var, values in self.optVarsHist[traj][varsUpdate].items(): # don't check constants, of course they're the same if var in self.constants: continue # differentiate vectors and scalars for checking if hasattr(values, '__len__'): if any(values != self.counter['recentOptHist'][traj][0] [var]): sameCoordinateCheck = False break else: if values != self.counter['recentOptHist'][traj][0][ var]: sameCoordinateCheck = False break self.raiseAMessage( printString.format('Same coordinate check', str(sameCoordinateCheck))) converged = converged or sameCoordinateCheck if converged: # update number of successful convergences self.counter['persistence'][traj] += 1 # check if we've met persistence requirement; if not, keep going if self.counter['persistence'][traj] >= self.convergencePersistence: self.raiseAMessage( ' ... Trajectory "{}" converged {} times consecutively!'. format(traj, self.counter['persistence'][traj])) self.convergeTraj[traj] = True self.removeConvergedTrajectory(traj) else: self.raiseAMessage( ' ... converged Traj "{}" {} times, required persistence is {}.' .format(traj, self.counter['persistence'][traj], self.convergencePersistence)) else: self.counter['persistence'][traj] = 0 self.raiseAMessage(' ... continuing trajectory "{}".'.format(traj))
#check hyperrectangle diagonal on several dimensions ## 2d sideLengths = [3, 4] checkAnswer('2D hyperdiagonal', mathUtils.hyperdiagonal(sideLengths), 5) ## 3d sideLengths.append(12) checkAnswer('3D hyperdiagonal', mathUtils.hyperdiagonal(sideLengths), 13) ## 3d sideLengths.append(84) checkAnswer('4D hyperdiagonal', mathUtils.hyperdiagonal(sideLengths), 85) # check diffWithInfinites i = float('inf') n = np.inf checkAnswer('InfDiff inf - inf', mathUtils.diffWithInfinites(n, i), 0) checkAnswer('InfDiff inf - finite', mathUtils.diffWithInfinites(n, 0), i) checkAnswer('InfDiff inf - (-inf)', mathUtils.diffWithInfinites(n, -n), i) checkAnswer('InfDiff finite - inf', mathUtils.diffWithInfinites(0, n), -i) checkAnswer('InfDiff finite - finite', mathUtils.diffWithInfinites(3, 2), 1) checkAnswer('InfDiff finite - (-inf)', mathUtils.diffWithInfinites(0, -n), i) checkAnswer('InfDiff -inf - inf', mathUtils.diffWithInfinites(-n, n), -i) checkAnswer('InfDiff -inf - finite', mathUtils.diffWithInfinites(-n, 0), -i) checkAnswer('InfDiff -inf - (-inf)', mathUtils.diffWithInfinites(-n, -n), 0) print(results) sys.exit(results["fail"]) """ <TestInfo> <name>framework.mathUtils</name>