Python CsvLoader.CsvLoaderの例

プログラミング言語: Python

クラス/型: CsvLoader

メソッド/関数: CsvLoader

hotexamples.comのコード掲載数: 2

Python CsvLoader.CsvLoader - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのCsvLoader.CsvLoader パッケージから ncvis-examplesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load(5)

CsvLoader(2)

process_csv_folder(2)

loadAsJson(1)

コード例 #1

ファイルを表示

ファイル: Code.py プロジェクト: dylanjm/raven

  def evaluateSample(self, myInput, samplerType, kwargs):
    """
        This will evaluate an individual sample on this model. Note, parameters
        are needed by createNewInput and thus descriptions are copied from there.
        @ In, myInput, list, the inputs (list) to start from to generate the new one
        @ In, samplerType, string, is the type of sampler that is calling to generate a new input
        @ In, kwargs, dict,  is a dictionary that contains the information coming from the sampler,
           a mandatory key is the sampledVars that contains a dictionary {'name variable':value}
        @ Out, returnValue, tuple, This will hold two pieces of information,
          the first item will be the input data used to generate this sample,
          the second item will be the output of this model given the specified
          inputs
    """
    inputFiles = self.createNewInput(myInput, samplerType, **kwargs)
    self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None)
    returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec)

    ## Given that createNewInput can only return a tuple, I don't think these
    ## checks are necessary (keeping commented out until someone else can verify):
    # if type(returnedCommand).__name__ != 'tuple':
    #   self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple")
    # if type(returnedCommand[0]).__name__ != 'list':
    #   self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!")
    executeCommand, self.outFileRoot = returnedCommand

    precommand = kwargs['precommand']
    postcommand = kwargs['postcommand']
    bufferSize = kwargs['bufferSize']
    fileExtensionsToDelete = kwargs['deleteOutExtension']
    deleteSuccessfulLogFiles = kwargs['delSucLogFiles']

    codeLogFile = self.outFileRoot
    if codeLogFile is None:
      codeLogFile = os.path.join(metaData['subDirectory'],'generalOut')

    ## Before we were temporarily changing directories in order to copy the
    ## correct directory to the subprocess. Instead, we can just set the
    ## directory after we copy it over. -- DPM 5/5/2017
    sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory'])
    localenv = dict(os.environ)
    localenv['PWD'] = str(sampleDirectory)
    outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize)

    found = False
    for index, inputFile in enumerate(self.currentInputFiles):
      if inputFile.getExt() in self.code.getInputExtension():
        found = True
        break
    if not found:
      self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code '
                                  + self.subType +': ' + ' '.join(self.getInputExtension()))
    commands=[]
    for runtype,cmd in executeCommand:
      newCommand=''
      if runtype.lower() == 'parallel':
        newCommand += precommand
        newCommand += cmd+' '
        newCommand += postcommand
        commands.append(newCommand)
      elif runtype.lower() == 'serial':
        commands.append(cmd)
      else:
        self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"!  Instead received: ',runtype,'\nPlease check the code interface.')

    command = ' && '.join(commands)+' '

    command = command.replace("%INDEX%",kwargs['INDEX'])
    command = command.replace("%INDEX1%",kwargs['INDEX1'])
    command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID'])
    command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1'])
    command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR'])
    command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR'])
    ## Note this is the working directory that the subprocess will use, it is
    ## not the directory I am currently working. This bit me as I moved the code
    ## from the old ExternalRunner because in that case this was filled in after
    ## the process was submitted by the process itself. -- DPM 5/4/17
    command = command.replace("%WORKING_DIR%",sampleDirectory)
    command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR'])
    command = command.replace("%METHOD%",kwargs['METHOD'])
    command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS'])

    self.raiseAMessage('Execution command submitted:',command)
    if platform.system() == 'Windows':
      command = self._expandForWindows(command)
      self.raiseAMessage("modified command to", repr(command))
      for key, value in localenv.items():
        localenv[key]=str(value)
    elif not self.code.getRunOnShell():
      command = self._expandCommand(command)
    print(f'DEBUGG command: |{command}|')
    ## reset python path
    localenv.pop('PYTHONPATH',None)
    ## This code should be evaluated by the job handler, so it is fine to wait
    ## until the execution of the external subprocess completes.
    process = utils.pickleSafeSubprocessPopen(command, shell=self.code.getRunOnShell(), stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv)
    if self.maxWallTime is not None:
      timeout = time.time() + self.maxWallTime
      while True:
        time.sleep(0.5)
        process.poll()
        if time.time() > timeout and process.returncode is None:
          self.raiseAWarning('walltime exeeded in run in working dir: '+str(metaData['subDirectory'])+'. Killing the run...')
          process.kill()
          process.returncode = -1
        if process.returncode is not None or time.time() > timeout:
          break
    else:
      process.wait()

    returnCode = process.returncode
    # procOutput = process.communicate()[0]

    ## If the returnCode is already non-zero, we should maintain our current
    ## value as it may have some meaning that can be parsed at some point, so
    ## only set the returnCode to -1 in here if we did not already catch the
    ## failure.
    if returnCode == 0 and 'checkForOutputFailure' in dir(self.code):
      codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory'])
      if codeFailed:
        returnCode = -1
    # close the log file
    outFileObject.close()
    ## We should try and use the output the code interface gives us first, but
    ## in lieu of that we should fall back on the standard output of the code
    ## (Which was deleted above in some cases, so I am not sure if this was
    ##  an intentional design by the original developer or accidental and should
    ##  be revised).
    ## My guess is that every code interface implements this given that the code
    ## below always adds .csv to the filename and the standard output file does
    ## not have an extension. - (DPM 4/6/2017)
    outputFile = codeLogFile
    if 'finalizeCodeOutput' in dir(self.code) and returnCode == 0:
      finalCodeOutputFile = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory'])
      ## Special case for RAVEN interface --ALFOA 09/17/17
      ravenCase = False
      if type(finalCodeOutputFile).__name__ == 'dict':
        ravenCase = True
      if ravenCase and self.code.__class__.__name__ != 'RAVEN':
        self.raiseAnError(RuntimeError, 'The return argument from "finalizeCodeOutput" must be a str containing the new output file root!')
      if finalCodeOutputFile and not ravenCase:
        outputFile = finalCodeOutputFile

    ## If the run was successful
    if returnCode == 0:
      ## This may be a tautology at this point --DPM 4/12/17
      ## Special case for RAVEN interface. Added ravenCase flag --ALFOA 09/17/17
      if outputFile is not None and not ravenCase:
        outFile = Files.CSV()
        ## Should we be adding the file extension here?
        outFile.initialize(outputFile+'.csv',self.messageHandler,path=metaData['subDirectory'])

        csvLoader = CsvLoader.CsvLoader(self.messageHandler)
        # does this CodeInterface have sufficiently intense (or limited) CSV files that
        #   it needs to assume floats and use numpy, or can we use pandas?
        loadUtility = self.code.getCsvLoadUtil()
        csvData = csvLoader.loadCsvFile(outFile.getAbsFile(), nullOK=False, utility=loadUtility)
        returnDict = csvLoader.toRealization(csvData)

      if not ravenCase:
        self._replaceVariablesNamesWithAliasSystem(returnDict, 'inout', True)
        returnDict.update(kwargs)
        returnValue = (kwargs['SampledVars'],returnDict)
        exportDict = self.createExportDictionary(returnValue)
      else:
        # we have the DataObjects -> raven-runs-raven case only so far
        # we have two tasks to do: collect the input/output/meta/indexes from the INNER raven run, and ALSO the input from the OUTER raven run.
        #  -> in addition, we have to fix the probability weights.
        ## get the number of realizations
        ### we already checked consistency in the CodeInterface, so just get the length of the first data object
        numRlz = len(utils.first(finalCodeOutputFile.values()))
        ## set up the return container
        exportDict = {'RAVEN_isBatch':True,'realizations':[]}
        ## set up each realization
        for n in range(numRlz):
          rlz = {}
          ## collect the results from INNER, both point set and history set
          for dataObj in finalCodeOutputFile.values():
            # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
            new = dict((var,np.atleast_1d(val)) for var,val in dataObj.realization(index=n,unpackXArray=True).items())
            rlz.update( new )
          ## add OUTER input space
          # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
          new = dict((var,np.atleast_1d(val)) for var,val in kwargs['SampledVars'].items())
          rlz.update( new )
          ## combine ProbabilityWeights # TODO FIXME these are a rough attempt at getting it right!
          rlz['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * kwargs.get('ProbabilityWeight',1.0))
          rlz['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * kwargs.get('PointProbability',1.0))
          # FIXME: adding "_n" to Optimizer samples scrambles its ability to find evaluations!
          ## temporary fix: only append if there's multiple realizations, and error out if sampler is an optimizer.
          if numRlz > 1:
            if '_' in kwargs['prefix']:
              self.raiseAnError(RuntimeError,'OUTER RAVEN is using an OPTIMIZER, but INNER RAVEN is returning multiple realizations!')
            addon = '_{}'.format(n)
          else:
            addon = ''
          rlz['prefix'] = np.atleast_1d(kwargs['prefix']+addon)
          ## add the rest of the metadata # TODO slow
          for var,val in kwargs.items():
            if var not in rlz.keys():
              rlz[var] = np.atleast_1d(val)
          self._replaceVariablesNamesWithAliasSystem(rlz,'inout',True)
          exportDict['realizations'].append(rlz)

      ## The last thing before returning should be to delete the temporary log
      ## file and any other file the user requests to be cleared
      if deleteSuccessfulLogFiles:
        self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!')
        codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile)
        if os.path.exists(codeLofFileFullPath):
          os.remove(codeLofFileFullPath)

      ## Check if the user specified any file extensions for clean up
      for fileExt in fileExtensionsToDelete:
        fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ]
        for f in fileList:
          os.remove(f)

      return exportDict

    else:
      self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode))
      absOutputFile = os.path.join(sampleDirectory,outputFile)
      if os.path.exists(absOutputFile):
        self.raiseAMessage(repr(open(absOutputFile,"r").read()).replace("\\n","\n"))
      else:
        self.raiseAMessage(" No output " + absOutputFile)

      ## If you made it here, then the run must have failed
      return None

コード例 #2

ファイルを表示

  def evaluateSample(self, myInput, samplerType, kwargs):
    """
        This will evaluate an individual sample on this model. Note, parameters
        are needed by createNewInput and thus descriptions are copied from there.
        @ In, myInput, list, the inputs (list) to start from to generate the new one
        @ In, samplerType, string, is the type of sampler that is calling to generate a new input
        @ In, kwargs, dict,  is a dictionary that contains the information coming from the sampler,
           a mandatory key is the sampledVars that contains a dictionary {'name variable':value}
        @ Out, returnValue, tuple, This will hold two pieces of information,
          the first item will be the input data used to generate this sample,
          the second item will be the output of this model given the specified
          inputs
    """
    inputFiles = self.createNewInput(myInput, samplerType, **kwargs)
    self.currentInputFiles, metaData = (copy.deepcopy(inputFiles[0]),inputFiles[1]) if type(inputFiles).__name__ == 'tuple' else (inputFiles, None)
    returnedCommand = self.code.genCommand(self.currentInputFiles,self.executable, flags=self.clargs, fileArgs=self.fargs, preExec=self.preExec)

    ## Given that createNewInput can only return a tuple, I don't think these
    ## checks are necessary (keeping commented out until someone else can verify):
    # if type(returnedCommand).__name__ != 'tuple':
    #   self.raiseAnError(IOError, "the generateCommand method in code interface must return a tuple")
    # if type(returnedCommand[0]).__name__ != 'list':
    #   self.raiseAnError(IOError, "the first entry in tuple returned by generateCommand method needs to be a list of tuples!")
    executeCommand, self.outFileRoot = returnedCommand

    precommand = kwargs['precommand']
    postcommand = kwargs['postcommand']
    bufferSize = kwargs['bufferSize']
    fileExtensionsToDelete = kwargs['deleteOutExtension']
    deleteSuccessfulLogFiles = kwargs['delSucLogFiles']

    codeLogFile = self.outFileRoot
    if codeLogFile is None:
      codeLogFile = os.path.join(metaData['subDirectory'],'generalOut')

    ## Before we were temporarily changing directories in order to copy the
    ## correct directory to the subprocess. Instead, we can just set the
    ## directory after we copy it over. -- DPM 5/5/2017
    sampleDirectory = os.path.join(os.getcwd(),metaData['subDirectory'])
    localenv = dict(os.environ)
    localenv['PWD'] = str(sampleDirectory)

    outFileObject = open(os.path.join(sampleDirectory,codeLogFile), 'w', bufferSize)

    found = False
    for index, inputFile in enumerate(self.currentInputFiles):
      if inputFile.getExt() in self.code.getInputExtension():
        found = True
        break
    if not found:
      self.raiseAnError(IOError,'None of the input files has one of the extensions requested by code '
                                  + self.subType +': ' + ' '.join(self.getInputExtension()))
    commands=[]
    for runtype,cmd in executeCommand:
      newCommand=''
      if runtype.lower() == 'parallel':
        newCommand += precommand
        newCommand += cmd+' '
        newCommand += postcommand
        commands.append(newCommand)
      elif runtype.lower() == 'serial':
        commands.append(cmd)
      else:
        self.raiseAnError(IOError,'For execution command <'+cmd+'> the run type was neither "serial" nor "parallel"!  Instead received: ',runtype,'\nPlease check the code interface.')

    command = ' && '.join(commands)+' '

    command = command.replace("%INDEX%",kwargs['INDEX'])
    command = command.replace("%INDEX1%",kwargs['INDEX1'])
    command = command.replace("%CURRENT_ID%",kwargs['CURRENT_ID'])
    command = command.replace("%CURRENT_ID1%",kwargs['CURRENT_ID1'])
    command = command.replace("%SCRIPT_DIR%",kwargs['SCRIPT_DIR'])
    command = command.replace("%FRAMEWORK_DIR%",kwargs['FRAMEWORK_DIR'])
    ## Note this is the working directory that the subprocess will use, it is
    ## not the directory I am currently working. This bit me as I moved the code
    ## from the old ExternalRunner because in that case this was filled in after
    ## the process was submitted by the process itself. -- DPM 5/4/17
    command = command.replace("%WORKING_DIR%",sampleDirectory)
    command = command.replace("%BASE_WORKING_DIR%",kwargs['BASE_WORKING_DIR'])
    command = command.replace("%METHOD%",kwargs['METHOD'])
    command = command.replace("%NUM_CPUS%",kwargs['NUM_CPUS'])

    self.raiseAMessage('Execution command submitted:',command)
    if platform.system() == 'Windows':
      command = self._expandForWindows(command)
      self.raiseAMessage("modified command to" + repr(command))

    ## This code should be evaluated by the job handler, so it is fine to wait
    ## until the execution of the external subprocess completes.
    process = utils.pickleSafeSubprocessPopen(command, shell=True, stdout=outFileObject, stderr=outFileObject, cwd=localenv['PWD'], env=localenv)
    process.wait()

    returnCode = process.returncode
    # procOutput = process.communicate()[0]

    ## If the returnCode is already non-zero, we should maintain our current
    ## value as it may have some meaning that can be parsed at some point, so
    ## only set the returnCode to -1 in here if we did not already catch the
    ## failure.
    if returnCode == 0 and 'checkForOutputFailure' in dir(self.code):
      codeFailed = self.code.checkForOutputFailure(codeLogFile, metaData['subDirectory'])
      if codeFailed:
        returnCode = -1
    # close the log file
    outFileObject.close()
    ## We should try and use the output the code interface gives us first, but
    ## in lieu of that we should fall back on the standard output of the code
    ## (Which was deleted above in some cases, so I am not sure if this was
    ##  an intentional design by the original developer or accidental and should
    ##  be revised).
    ## My guess is that every code interface implements this given that the code
    ## below always adds .csv to the filename and the standard output file does
    ## not have an extension. - (DPM 4/6/2017)
    outputFile = codeLogFile
    if 'finalizeCodeOutput' in dir(self.code):
      finalCodeOutputFile = self.code.finalizeCodeOutput(command, codeLogFile, metaData['subDirectory'])
      if finalCodeOutputFile:
        outputFile = finalCodeOutputFile

    ## If the run was successful
    if returnCode == 0:

      returnDict = {}
      ## This may be a tautology at this point --DPM 4/12/17
      if outputFile is not None:
        outFile = Files.CSV()
        ## Should we be adding the file extension here?
        outFile.initialize(outputFile+'.csv',self.messageHandler,path=metaData['subDirectory'])

        csvLoader = CsvLoader.CsvLoader(self.messageHandler)
        csvData = csvLoader.loadCsvFile(outFile)
        headers = csvLoader.getAllFieldNames()

        ## Numpy by default iterates over rows, thus we transpose the data and
        ## zip it with the headers in order to do store it very cleanly into a
        ## dictionary.
        for header,data in zip(headers, csvData.T):
          returnDict[header] = data

      self._replaceVariablesNamesWithAliasSystem(returnDict, 'input', True)
      self._replaceVariablesNamesWithAliasSystem(returnDict, 'output', True)

      ## The last thing before returning should be to delete the temporary log
      ## file and any other file the user requests to be cleared
      if deleteSuccessfulLogFiles:
        self.raiseAMessage(' Run "' +kwargs['prefix']+'" ended smoothly, removing log file!')
        codeLofFileFullPath = os.path.join(metaData['subDirectory'],codeLogFile)
        if os.path.exists(codeLofFileFullPath):
          os.remove(codeLofFileFullPath)

      ## Check if the user specified any file extensions for clean up
      for fileExt in fileExtensionsToDelete:
        if not fileExt.startswith("."):
          fileExt = "." + fileExt

        fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ]

        for f in fileList:
          os.remove(f)

      returnValue = (kwargs['SampledVars'],returnDict)
      return returnValue
    else:
      self.raiseAMessage(" Process Failed "+str(command)+" returnCode "+str(returnCode))
      absOutputFile = os.path.join(sampleDirectory,outputFile)
      if os.path.exists(absOutputFile):
        self.raiseAMessage(repr(open(absOutputFile,"r").read()).replace("\\n","\n"))
      else:
        self.raiseAMessage(" No output " + absOutputFile)

      ## If you made it here, then the run must have failed
      return None