def evaluateSample(self, myInput, samplerType, kwargs): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars that contains a dictionary {'name variable':value} @ Out, returnValue, tuple, This will hold two pieces of information, the first item will be the input data used to generate this sample, the second item will be the output of this model given the specified inputs """ inputFiles = self.createNewInput(myInput, samplerType, **kwargs) self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None) returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec) ## Given that createNewInput can only return a tuple, I don't think these ## checks are necessary (keeping commented out until someone else can verify): # if type(returnedCommand).__name__ != 'tuple': # self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple") # if type(returnedCommand[0]).__name__ != 'list': # self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!") executeCommand, self.outFileRoot = returnedCommand precommand = kwargs['precommand'] postcommand = kwargs['postcommand'] bufferSize = kwargs['bufferSize'] fileExtensionsToDelete = kwargs['deleteOutExtension'] deleteSuccessfulLogFiles = kwargs['delSucLogFiles'] codeLogFile = self.outFileRoot if codeLogFile is None: codeLogFile = os.path.join(metaData['subDirectory'],'generalOut') ## Before we were temporarily changing directories in order to copy the ## correct directory to the subprocess. Instead, we can just set the ## directory after we copy it over. -- DPM 5/5/2017 sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory']) localenv = dict(os.environ) localenv['PWD'] = str(sampleDirectory) outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize) found = False for index, inputFile in enumerate(self.currentInputFiles): if inputFile.getExt() in self.code.getInputExtension(): found = True break if not found: self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code ' + self.subType +': ' + ' '.join(self.getInputExtension())) commands=[] for runtype,cmd in executeCommand: newCommand='' if runtype.lower() == 'parallel': newCommand += precommand newCommand += cmd+' ' newCommand += postcommand commands.append(newCommand) elif runtype.lower() == 'serial': commands.append(cmd) else: self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"! Instead received: ',runtype,'\nPlease check the code interface.') command = ' && '.join(commands)+' ' command = command.replace("%INDEX%",kwargs['INDEX']) command = command.replace("%INDEX1%",kwargs['INDEX1']) command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID']) command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1']) command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR']) command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR']) ## Note this is the working directory that the subprocess will use, it is ## not the directory I am currently working. This bit me as I moved the code ## from the old ExternalRunner because in that case this was filled in after ## the process was submitted by the process itself. -- DPM 5/4/17 command = command.replace("%WORKING_DIR%",sampleDirectory) command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR']) command = command.replace("%METHOD%",kwargs['METHOD']) command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS']) self.raiseAMessage('Execution command submitted:',command) if platform.system() == 'Windows': command = self._expandForWindows(command) self.raiseAMessage("modified command to", repr(command)) for key, value in localenv.items(): localenv[key]=str(value) elif not self.code.getRunOnShell(): command = self._expandCommand(command) print(f'DEBUGG command: |{command}|') ## reset python path localenv.pop('PYTHONPATH',None) ## This code should be evaluated by the job handler, so it is fine to wait ## until the execution of the external subprocess completes. process = utils.pickleSafeSubprocessPopen(command, shell=self.code.getRunOnShell(), stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv) if self.maxWallTime is not None: timeout = time.time() + self.maxWallTime while True: time.sleep(0.5) process.poll() if time.time() > timeout and process.returncode is None: self.raiseAWarning('walltime exeeded in run in working dir: '+str(metaData['subDirectory'])+'. Killing the run...') process.kill() process.returncode = -1 if process.returncode is not None or time.time() > timeout: break else: process.wait() returnCode = process.returncode # procOutput = process.communicate()[0] ## If the returnCode is already non-zero, we should maintain our current ## value as it may have some meaning that can be parsed at some point, so ## only set the returnCode to -1 in here if we did not already catch the ## failure. if returnCode == 0 and 'checkForOutputFailure' in dir(self.code): codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory']) if codeFailed: returnCode = -1 # close the log file outFileObject.close() ## We should try and use the output the code interface gives us first, but ## in lieu of that we should fall back on the standard output of the code ## (Which was deleted above in some cases, so I am not sure if this was ## an intentional design by the original developer or accidental and should ## be revised). ## My guess is that every code interface implements this given that the code ## below always adds .csv to the filename and the standard output file does ## not have an extension. - (DPM 4/6/2017) outputFile = codeLogFile if 'finalizeCodeOutput' in dir(self.code) and returnCode == 0: finalCodeOutputFile = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory']) ## Special case for RAVEN interface --ALFOA 09/17/17 ravenCase = False if type(finalCodeOutputFile).__name__ == 'dict': ravenCase = True if ravenCase and self.code.__class__.__name__ != 'RAVEN': self.raiseAnError(RuntimeError, 'The return argument from "finalizeCodeOutput" must be a str containing the new output file root!') if finalCodeOutputFile and not ravenCase: outputFile = finalCodeOutputFile ## If the run was successful if returnCode == 0: ## This may be a tautology at this point --DPM 4/12/17 ## Special case for RAVEN interface. Added ravenCase flag --ALFOA 09/17/17 if outputFile is not None and not ravenCase: outFile = Files.CSV() ## Should we be adding the file extension here? outFile.initialize(outputFile+'.csv',self.messageHandler,path=metaData['subDirectory']) csvLoader = CsvLoader.CsvLoader(self.messageHandler) # does this CodeInterface have sufficiently intense (or limited) CSV files that # it needs to assume floats and use numpy, or can we use pandas? loadUtility = self.code.getCsvLoadUtil() csvData = csvLoader.loadCsvFile(outFile.getAbsFile(), nullOK=False, utility=loadUtility) returnDict = csvLoader.toRealization(csvData) if not ravenCase: self._replaceVariablesNamesWithAliasSystem(returnDict, 'inout', True) returnDict.update(kwargs) returnValue = (kwargs['SampledVars'],returnDict) exportDict = self.createExportDictionary(returnValue) else: # we have the DataObjects -> raven-runs-raven case only so far # we have two tasks to do: collect the input/output/meta/indexes from the INNER raven run, and ALSO the input from the OUTER raven run. # -> in addition, we have to fix the probability weights. ## get the number of realizations ### we already checked consistency in the CodeInterface, so just get the length of the first data object numRlz = len(utils.first(finalCodeOutputFile.values())) ## set up the return container exportDict = {'RAVEN_isBatch':True,'realizations':[]} ## set up each realization for n in range(numRlz): rlz = {} ## collect the results from INNER, both point set and history set for dataObj in finalCodeOutputFile.values(): # TODO FIXME check for overwriting data. For now just replace data if it's duplicate! new = dict((var,np.atleast_1d(val)) for var,val in dataObj.realization(index=n,unpackXArray=True).items()) rlz.update( new ) ## add OUTER input space # TODO FIXME check for overwriting data. For now just replace data if it's duplicate! new = dict((var,np.atleast_1d(val)) for var,val in kwargs['SampledVars'].items()) rlz.update( new ) ## combine ProbabilityWeights # TODO FIXME these are a rough attempt at getting it right! rlz['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * kwargs.get('ProbabilityWeight',1.0)) rlz['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * kwargs.get('PointProbability',1.0)) # FIXME: adding "_n" to Optimizer samples scrambles its ability to find evaluations! ## temporary fix: only append if there's multiple realizations, and error out if sampler is an optimizer. if numRlz > 1: if '_' in kwargs['prefix']: self.raiseAnError(RuntimeError,'OUTER RAVEN is using an OPTIMIZER, but INNER RAVEN is returning multiple realizations!') addon = '_{}'.format(n) else: addon = '' rlz['prefix'] = np.atleast_1d(kwargs['prefix']+addon) ## add the rest of the metadata # TODO slow for var,val in kwargs.items(): if var not in rlz.keys(): rlz[var] = np.atleast_1d(val) self._replaceVariablesNamesWithAliasSystem(rlz,'inout',True) exportDict['realizations'].append(rlz) ## The last thing before returning should be to delete the temporary log ## file and any other file the user requests to be cleared if deleteSuccessfulLogFiles: self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!') codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile) if os.path.exists(codeLofFileFullPath): os.remove(codeLofFileFullPath) ## Check if the user specified any file extensions for clean up for fileExt in fileExtensionsToDelete: fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ] for f in fileList: os.remove(f) return exportDict else: self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode)) absOutputFile = os.path.join(sampleDirectory,outputFile) if os.path.exists(absOutputFile): self.raiseAMessage(repr(open(absOutputFile,"r").read()).replace("\\n","\n")) else: self.raiseAMessage(" No output " + absOutputFile) ## If you made it here, then the run must have failed return None
def evaluateSample(self, myInput, samplerType, kwargs): """ This will evaluate an individual sample on this model. Note, parameters are needed by createNewInput and thus descriptions are copied from there. @ In, myInput, list, the inputs (list) to start from to generate the new one @ In, samplerType, string, is the type of sampler that is calling to generate a new input @ In, kwargs, dict, is a dictionary that contains the information coming from the sampler, a mandatory key is the sampledVars that contains a dictionary {'name variable':value} @ Out, returnValue, tuple, This will hold two pieces of information, the first item will be the input data used to generate this sample, the second item will be the output of this model given the specified inputs """ inputFiles = self.createNewInput(myInput, samplerType, **kwargs) self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None) returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec) ## Given that createNewInput can only return a tuple, I don't think these ## checks are necessary (keeping commented out until someone else can verify): # if type(returnedCommand).__name__ != 'tuple': # self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple") # if type(returnedCommand[0]).__name__ != 'list': # self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!") executeCommand, self.outFileRoot = returnedCommand precommand = kwargs['precommand'] postcommand = kwargs['postcommand'] bufferSize = kwargs['bufferSize'] fileExtensionsToDelete = kwargs['deleteOutExtension'] deleteSuccessfulLogFiles = kwargs['delSucLogFiles'] codeLogFile = self.outFileRoot if codeLogFile is None: codeLogFile = os.path.join(metaData['subDirectory'],'generalOut') ## Before we were temporarily changing directories in order to copy the ## correct directory to the subprocess. Instead, we can just set the ## directory after we copy it over. -- DPM 5/5/2017 sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory']) localenv = dict(os.environ) localenv['PWD'] = str(sampleDirectory) outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize) found = False for index, inputFile in enumerate(self.currentInputFiles): if inputFile.getExt() in self.code.getInputExtension(): found = True break if not found: self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code ' + self.subType +': ' + ' '.join(self.getInputExtension())) commands=[] for runtype,cmd in executeCommand: newCommand='' if runtype.lower() == 'parallel': newCommand += precommand newCommand += cmd+' ' newCommand += postcommand commands.append(newCommand) elif runtype.lower() == 'serial': commands.append(cmd) else: self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"! Instead received: ',runtype,'\nPlease check the code interface.') command = ' && '.join(commands)+' ' command = command.replace("%INDEX%",kwargs['INDEX']) command = command.replace("%INDEX1%",kwargs['INDEX1']) command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID']) command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1']) command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR']) command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR']) ## Note this is the working directory that the subprocess will use, it is ## not the directory I am currently working. This bit me as I moved the code ## from the old ExternalRunner because in that case this was filled in after ## the process was submitted by the process itself. -- DPM 5/4/17 command = command.replace("%WORKING_DIR%",sampleDirectory) command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR']) command = command.replace("%METHOD%",kwargs['METHOD']) command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS']) self.raiseAMessage('Execution command submitted:',command) if platform.system() == 'Windows': command = self._expandForWindows(command) self.raiseAMessage("modified command to" + repr(command)) ## This code should be evaluated by the job handler, so it is fine to wait ## until the execution of the external subprocess completes. process = utils.pickleSafeSubprocessPopen(command, shell=True, stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv) process.wait() returnCode = process.returncode # procOutput = process.communicate()[0] ## If the returnCode is already non-zero, we should maintain our current ## value as it may have some meaning that can be parsed at some point, so ## only set the returnCode to -1 in here if we did not already catch the ## failure. if returnCode == 0 and 'checkForOutputFailure' in dir(self.code): codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory']) if codeFailed: returnCode = -1 # close the log file outFileObject.close() ## We should try and use the output the code interface gives us first, but ## in lieu of that we should fall back on the standard output of the code ## (Which was deleted above in some cases, so I am not sure if this was ## an intentional design by the original developer or accidental and should ## be revised). ## My guess is that every code interface implements this given that the code ## below always adds .csv to the filename and the standard output file does ## not have an extension. - (DPM 4/6/2017) outputFile = codeLogFile if 'finalizeCodeOutput' in dir(self.code): finalCodeOutputFile = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory']) if finalCodeOutputFile: outputFile = finalCodeOutputFile ## If the run was successful if returnCode == 0: returnDict = {} ## This may be a tautology at this point --DPM 4/12/17 if outputFile is not None: outFile = Files.CSV() ## Should we be adding the file extension here? outFile.initialize(outputFile+'.csv',self.messageHandler,path=metaData['subDirectory']) csvLoader = CsvLoader.CsvLoader(self.messageHandler) csvData = csvLoader.loadCsvFile(outFile) headers = csvLoader.getAllFieldNames() ## Numpy by default iterates over rows, thus we transpose the data and ## zip it with the headers in order to do store it very cleanly into a ## dictionary. for header,data in zip(headers, csvData.T): returnDict[header] = data self._replaceVariablesNamesWithAliasSystem(returnDict, 'input', True) self._replaceVariablesNamesWithAliasSystem(returnDict, 'output', True) ## The last thing before returning should be to delete the temporary log ## file and any other file the user requests to be cleared if deleteSuccessfulLogFiles: self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!') codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile) if os.path.exists(codeLofFileFullPath): os.remove(codeLofFileFullPath) ## Check if the user specified any file extensions for clean up for fileExt in fileExtensionsToDelete: if not fileExt.startswith("."): fileExt = "." + fileExt fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ] for f in fileList: os.remove(f) returnValue = (kwargs['SampledVars'],returnDict) return returnValue else: self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode)) absOutputFile = os.path.join(sampleDirectory,outputFile) if os.path.exists(absOutputFile): self.raiseAMessage(repr(open(absOutputFile,"r").read()).replace("\\n","\n")) else: self.raiseAMessage(" No output " + absOutputFile) ## If you made it here, then the run must have failed return None