Python getJobAttributesの例

プログラミング言語: Python

名前空間/パッケージ名: DIRAC.WorkloadManagementSystem.Client.ServerUtils.jobDB

メソッド/関数: getJobAttributes

hotexamples.comのコード掲載数: 7

Python getJobAttributes - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのDIRAC.WorkloadManagementSystem.Client.ServerUtils.jobDB.getJobAttributesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: BigDataJobMonitoring.py プロジェクト: vfalbor/BigDataDIRAC

    def sendJobAccounting(self, dataFromBDSoft, jobId):
        accountingReport = AccountingJob()
        accountingReport.setStartTime()

        result = jobDB.getJobAttributes(jobId)
        getting = result["Value"]
        if dataFromBDSoft["CPUTime"] == 0:
            cpuTime = 0
            if getting["EndExecTime"] != "None":
                epoch = datetime(1970, 1, 1)
                td = datetime.strptime(getting["EndExecTime"], "%Y-%m-%d %H:%M:%S") - epoch
                EndExecTime = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 1e6
                td = datetime.strptime(getting["SubmissionTime"], "%Y-%m-%d %H:%M:%S") - epoch
                SubmissionTime = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 1e6
                cpuTime = EndExecTime - SubmissionTime
        else:
            cpuTime = dataFromBDSoft["CPUTime"] / 1000

        acData = {
            "User": getting["Owner"],
            "UserGroup": getting["OwnerGroup"],
            "JobGroup": "cesga",
            "JobType": "User",
            "JobClass": "unknown",
            "ProcessingType": "unknown",
            "FinalMajorStatus": getting["Status"],
            "FinalMinorStatus": getting["MinorStatus"],
            "CPUTime": cpuTime,
            "Site": getting["Site"],
            # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model)
            "NormCPUTime": 0,
            "ExecTime": cpuTime,
            "InputDataSize": dataFromBDSoft["InputDataSize"],
            "OutputDataSize": dataFromBDSoft["OutputDataSize"],
            "InputDataFiles": dataFromBDSoft["InputDataFiles"],
            "OutputDataFiles": len(self.fileList),
            "DiskSpace": 0,
            "InputSandBoxSize": 0,
            "OutputSandBoxSize": self.outputSandboxSize,
            "ProcessedEvents": 0,
        }
        accountingReport.setEndTime()
        accountingReport.setValuesFromDict(acData)
        self.log.debug("Info for accounting: ", acData)
        result = accountingReport.commit()
        self.log.debug("Accounting insertion: ", result)
        return result

コード例 #2

ファイルを表示

ファイル: HiveV1.py プロジェクト: vfalbor/BigDataDIRAC

  def submitNewBigJob( self ):

    result = jobDB.getJobJDL( str( self.__jobID ) , True )
    classAdJob = ClassAd( result['Value'] )
    executableFile = ""
    if classAdJob.lookupAttribute( 'Executable' ):
      executableFile = classAdJob.getAttributeString( 'Executable' )

    tempPath = self.__tmpSandBoxDir
    dirac = Dirac()
    if not os.path.exists( tempPath ):
      os.makedirs( tempPath )

    settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath )
    self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir )
    moveData = self.__tmpSandBoxDir + "/InputSandbox" + str( self.__jobID )

    HiveV1Cli = HiveV1Client( self.__User , self.__publicIP )
    returned = HiveV1Cli.dataCopy( moveData, self.__tmpSandBoxDir )
    self.log.info( 'Copy the job contain to the Hadoop Master with HIVE: ', returned )

    jobInfo = jobDB.getJobAttributes( self.__jobID )
    if not jobInfo['OK']:
      return S_ERROR( jobInfo['Value'] )
    proxy = ""
    jobInfo = jobInfo['Value']
    if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ):
      proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )
    else:
      proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )

    HiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile
    HiveJobOutput = str( self.__jobID ) + "_" + executableFile + "_out"

    returned = HiveV1Cli.jobSubmit( tempPath, HiveJob, proxy['chain'], HiveJobOutput )
    self.log.info( 'Launch Hadoop-Hive job to the Master: ', returned )

    if not returned['OK']:
      return S_ERROR( returned['Message'] )
    else:
      self.log.info( 'Hadoop-Hive Job ID: ', returned['Value'] )

    return S_OK( returned['Value'] )

コード例 #3

ファイルを表示

ファイル: HadoopV1.py プロジェクト: vfalbor/BigDataDIRAC

    def submitNewBigPilot(self):

        tempPath = self.__tmpSandBoxDir + str(self.__jobID)
        dirac = Dirac()
        if not os.path.exists(tempPath):
            os.makedirs(tempPath)

        settingJobSandBoxDir = dirac.getInputSandbox(self.__jobID, tempPath)
        self.log.info("Writting temporal SandboxDir in Server", settingJobSandBoxDir)

        jobXMLName = "job:" + str(self.__jobID) + ".xml"
        with open(os.path.join(tempPath, jobXMLName), "wb") as temp_file:
            temp_file.write(self.jobWrapper())
        self.log.info("Writting temporal Hadoop Job.xml")

        HadoopV1cli = HadoopV1Client(self.__User, self.__publicIP, self.__Port)
        # returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir )
        # self.log.info( 'Copy the job contain to the Hadoop Master: ', returned )

        jobInfo = jobDB.getJobAttributes(self.__jobID)
        if not jobInfo["OK"]:
            return S_ERROR(jobInfo["Value"])
        proxy = ""
        jobInfo = jobInfo["Value"]
        if gProxyManager.userHasProxy(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]):
            proxy = gProxyManager.downloadProxyToFile(jobInfo["OwnerDN"], jobInfo["OwnerGroup"])
        else:
            proxy = self.__requestProxyFromProxyManager(jobInfo["OwnerDN"], jobInfo["OwnerGroup"])

        returned = HadoopV1cli.submitPilotJob(tempPath, jobXMLName, proxy["chain"])

        self.log.info("Launch Hadoop pilot to the Hadoop Master: ", returned)

        if not returned["OK"]:
            return S_ERROR(returned["Value"])
        else:
            self.log.info("Hadoop Job ID: ", returned["Value"])

        return S_OK(returned["Value"])

コード例 #4

ファイルを表示

ファイル: HadoopV2.py プロジェクト: vfalbor/BigDataDIRAC

  def submitNewBigJob( self ):

    tempPath = self.__tmpSandBoxDir + str( self.__jobID )
    dirac = Dirac()
    if not os.path.exists( tempPath ):
      os.makedirs( tempPath )

    settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath )
    self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir )

    jobXMLName = "job:" + str( self.__jobID ) + '.xml'
    with open( os.path.join( tempPath, jobXMLName ), 'wb' ) as temp_file:
        temp_file.write( self.jobWrapper() )
    self.log.info( 'Writting temporal Hadoop Job.xml' )

    HadoopV1cli = HadoopV2Client( self.__User , self.__publicIP )
    returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir )
    self.log.info( 'Copy the job contain to the Hadoop Master: ', returned )

    jobInfo = jobDB.getJobAttributes( self.__jobID )
    if not jobInfo['OK']:
      return S_ERROR( jobInfo['Value'] )
    proxy = ""
    jobInfo = jobInfo['Value']
    if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ):
      proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )
    else:
      proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )

    returned = HadoopV1cli.jobSubmit( tempPath, jobXMLName, proxy['chain'] )
    self.log.info( 'Launch Hadoop job to the Hadoop Master: ', returned )

    if not returned['OK']:
      return S_ERROR( returned['Message'] )
    else:
      self.log.info( 'Hadoop Job ID: ', returned['Value'] )

    return S_OK( returned['Value'] )

コード例 #5

ファイルを表示

ファイル: InteractiveJobMonitor.py プロジェクト: vfalbor/BigDataDIRAC

    def monitoring( self, loop, parentthread, output ):

      self.initialTiming = os.times()
      accountingReport = AccountingJob()
      accountingReport.setStartTime()

      numberJobsFlag = True
      numberJobs = 0
      numberStartedJobsDict = {}
      numberEndingJobsDict = {}

      job_pattern = re.compile( 'Job =.*?,' )
      job_pattern_2 = re.compile( 'Job =.*?\n' )
      jobid = int( re.split( "_", re.split( "/", output )[int( len( re.split( "/", output ) ) - 1 )] )[0] )

      cmd = '/bin/chmod 555 ' + self.getinfo
      returned = self.commandLaunch( cmd )

      while parentthread.isAlive():
        time.sleep( loop )
        if numberJobsFlag:
          cmd = self.getinfo + ' -c step1'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step1:numJobs:', returned )
          if returned != None:
            if ( returned['Value'][1] != "" ):
              if re.split( "=", returned['Value'][1] )[1].strip().isdigit():
                numberJobs = int( re.split( "=", returned['Value'][1] )[1] )
            if ( numberJobs != 0 ):
              numberJobsFlag = False
              BigDataDB.setJobStatus( jobid, "Running" )
        else:
          cmd = self.getinfo + ' -c step2'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step2:startedJobs:', returned )
          if returned != "":
            if ( returned['Value'][1] != "" ):
              startedJobs = job_pattern.findall( returned['Value'][1] )
              self.log.info( 'step2:startedJobs:', startedJobs )
          cmd = self.getinfo + ' -c step3'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step3:endedJobs:', returned )
          if returned != "":
            if ( returned['Value'][1] != "" ):
              finishedJobs = job_pattern_2.findall( returned['Value'][1] )
              self.log.info( 'step3:finishedJobs:', finishedJobs )
              if ( len( finishedJobs ) == numberJobs ):
                BigDataDB.setJobStatus( jobid, "Done" )
                BigDataDB.setHadoopID( jobid, finishedJobs )
                self.__updateSandBox( jobid, output )

                #Update Accounting                
                EXECUTION_RESULT = {}
                EXECUTION_RESULT['CPU'] = []
                finalStat = os.times()
                for i in range( len( finalStat ) ):
                  EXECUTION_RESULT['CPU'].append( finalStat[i] - self.initialTiming[i] )
                utime, stime, cutime, cstime, elapsed = EXECUTION_RESULT['CPU']
                cpuTime = utime + stime + cutime + cstime
                execTime = elapsed
                result = jobDB.getJobAttributes( jobid )
                getting = result['Value']
                acData = {
                        'User' : getting['Owner'],
                        'UserGroup' : getting['OwnerGroup'],
                        'JobGroup' : 'cesga',
                        'JobType' : 'User',
                        'JobClass' : 'unknown',
                        'ProcessingType' : 'unknown',
                        'FinalMajorStatus' : getting['Status'],
                        'FinalMinorStatus' : getting['MinorStatus'],
                        'CPUTime' : cpuTime,
                        'Site' : getting['Site'],
                        # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model)
                        'NormCPUTime' : 0,
                        'ExecTime' : cpuTime,
                        'InputDataSize' : 0,
                        'OutputDataSize' : 0,
                        'InputDataFiles' : 0,
                        'OutputDataFiles' : 0,
                        'DiskSpace' : 0,
                        'InputSandBoxSize' : 0,
                        'OutputSandBoxSize' : 0,
                        'ProcessedEvents' : 0
                        }
                accountingReport.setEndTime()
                accountingReport.setValuesFromDict( acData )
                result = accountingReport.commit()

コード例 #6

ファイルを表示

ファイル: BigDataJobScheduler.py プロジェクト: vfalbor/BigDataDIRAC

  def execute( self ):
    """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Count Pending Jobs
      3.- Submit Jobs
    """
    self.__checkSubmitPools()

    bigDataJobsToSubmit = {}
    bigDataJobIdsToSubmit = {}

    for directorName, directorDict in self.directors.items():
      self.log.verbose( 'Checking Director:', directorName )
      self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints )
      for runningEndPointName in directorDict['director'].runningEndPoints:
        runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName]
        NameNode = runningEndPointDict['NameNode']
        jobsByEndPoint = 0
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint )
        jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint']

        bigDataJobs = 0
        if jobsByEndPoint >= jobLimitsEndPoint:
          self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) )
          continue
        else:
          bigDataJobs = jobLimitsEndPoint - jobsByEndPoint
        requirementsDict = runningEndPointDict['Requirements']

        self.log.info( 'Requirements Dict: ', requirementsDict )
        result = taskQueueDB.getMatchingTaskQueues( requirementsDict )
        if not result['OK']:
          self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
          return result

        taskQueueDict = result['Value']
        self.log.info( 'Task Queues Dict: ', taskQueueDict )
        jobs = 0
        priority = 0
        cpu = 0
        jobsID = 0
        self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs )
        for tq in taskQueueDict:
          jobs += taskQueueDict[tq]['Jobs']
          priority += taskQueueDict[tq]['Priority']
          cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime']

          #Matching of Jobs with BigData Softwares
          #This process is following the sequence:
          #Retrieve a job from taskqueueDict
          #Get job name and try to match with the resources        
          #If not match store the var pendingTaskQueueJobs for the
          #next iteration
          #
          #This matching is doing with the following JobName Pattern
          # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName          
          #extract a job from the TaskQueue
          if tq not in self.pendingTaskQueueJobs.keys():
            self.pendingTaskQueueJobs[tq] = {}
          getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] )
          if not getJobFromTaskQueue['OK']:
            self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] )
            return getJobFromTaskQueue

          jobInfo = getJobFromTaskQueue['Value']
          jobID = jobInfo['jobId']
          jobAttrInfo = jobDB.getJobAttributes( jobID )

          if not jobAttrInfo['OK']:
            self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] )
            return jobAttrInfo
          jobInfoUniq = jobAttrInfo['Value']
          jobName = jobInfoUniq['JobName']
          self.pendingTaskQueueJobs[tq][jobID] = jobName


          result = jobDB.getJobJDL( jobID, True )
          classAdJob = ClassAd( result['Value'] )
          arguments = 0
          if classAdJob.lookupAttribute( 'Arguments' ):
            arguments = classAdJob.getAttributeString( 'Arguments' )
          #if not classAdJob.lookupAttribute( 'Arguments' ):
          #  continue

          jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                       runningEndPointName,
                                                       runningEndPointDict['BigDataSoftware'],
                                                       runningEndPointDict['BigDataSoftwareVersion'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                       jobID )
          if ( jobsToSubmit == "OK" ):
            if directorName not in bigDataJobsToSubmit:
              bigDataJobsToSubmit[directorName] = {}
            if runningEndPointName not in bigDataJobsToSubmit[directorName]:
              bigDataJobsToSubmit[directorName][runningEndPointName] = {}
            bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID,
                                                        'JobName': jobName,
                                                        'TQPriority': priority,
                                                        'CPUTime': cpu,
                                                        'BigDataEndpoint': runningEndPointName,
                                                        'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                        'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                        'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                        'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                        'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                        'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                        'SiteName': runningEndPointDict['SiteName'],
                                                        'PublicIP': runningEndPointDict['PublicIP'],
                                                        'User': runningEndPointDict['User'],
                                                        'Port': runningEndPointDict['Port'],
                                                        'UsePilot': runningEndPointDict['UsePilot'],
                                                        'IsInteractive': runningEndPointDict['IsInteractive'],
                                                        'Arguments': arguments }
            del self.pendingTaskQueueJobs[tq][jobID]
          else:
            self.log.error( jobsToSubmit )
        self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs )
        for tq in self.pendingTaskQueueJobs.keys():
          for jobid in self.pendingTaskQueueJobs[tq].keys():
            result = jobDB.getJobJDL( jobid, True )
            classAdJob = ClassAd( result['Value'] )
            arguments = 0
            if classAdJob.lookupAttribute( 'Arguments' ):
              arguments = classAdJob.getAttributeString( 'Arguments' )
            #if not classAdJob.lookupAttribute( 'Arguments' ):
            #  continue
            #do the match with the runningEndPoint
            jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                             runningEndPointName,
                                                             runningEndPointDict['BigDataSoftware'],
                                                             runningEndPointDict['BigDataSoftwareVersion'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                             jobid )
            if ( jobsToSubmit == "OK" ):
              if directorName not in bigDataJobsToSubmit:
                bigDataJobsToSubmit[directorName] = {}
              if runningEndPointName not in bigDataJobsToSubmit[directorName]:
                bigDataJobsToSubmit[directorName][runningEndPointName] = {}
              bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid,
                                                          'JobName': self.pendingTaskQueueJobs[tq][jobid],
                                                          'TQPriority': priority,
                                                          'CPUTime': cpu,
                                                          'BigDataEndpoint': runningEndPointName,
                                                          'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                          'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                          'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                          'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                          'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                          'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                          'SiteName': runningEndPointDict['SiteName'],
                                                          'PublicIP': runningEndPointDict['PublicIP'],
                                                          'User': runningEndPointDict['User'],
                                                          'Port': runningEndPointDict['Port'],
                                                          'UsePilot': runningEndPointDict['UsePilot'],
                                                          'IsInteractive': runningEndPointDict['IsInteractive'],
                                                          'Arguments': arguments  }
              del self.pendingTaskQueueJobs[tq][jobid]
            else:
             self.log.error( jobsToSubmit )
        if not jobs and not self.pendingTaskQueueJobs:
          self.log.info( 'No matching jobs for %s found, skipping' % NameNode )
          continue

        self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit )

    for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items():
      for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items():
        if self.directors[directorName]['isEnabled']:
          self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) )

          director = self.directors[directorName]['director']
          pool = self.pools[self.directors[directorName]['pool']]

          jobIDs = JobsToSubmitDict[runningEndPointName]['JobId']
          jobName = JobsToSubmitDict[runningEndPointName]['JobName']
          endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint']
          runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName']
          NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode']
          BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware']
          BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion']
          HLLName = JobsToSubmitDict[runningEndPointName]['HLLName']
          HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion']
          PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP']
          User = JobsToSubmitDict[runningEndPointName]['User']
          Port = JobsToSubmitDict[runningEndPointName]['Port']
          UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot']
          IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive']
          Arguments = JobsToSubmitDict[runningEndPointName]['Arguments']
          numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit']

          ret = pool.generateJobAndQueueIt( director.submitBigDataJobs,
                                            args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode,
                                                     BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion,
                                                     PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ),
                                            oCallback = self.callBack,
                                            oExceptionCallback = director.exceptionCallBack,
                                            blocking = False )
          if not ret['OK']:
            # Disable submission until next iteration
            self.directors[directorName]['isEnabled'] = False
          else:
            time.sleep( self.am_getOption( 'ThreadStartDelay' ) )

    if 'Default' in self.pools:
      # only for those in "Default' thread Pool
      # for pool in self.pools:
      self.pools['Default'].processAllResults()

    return DIRAC.S_OK()

コード例 #7

ファイルを表示

ファイル: HadoopV1Interactive.py プロジェクト: vfalbor/BigDataDIRAC

  def submitNewBigJob( self ):

    #1.- Creamos carpeta temporal
    self.log.debug( 'Step1::: mkdir temp folder' )
    tempPath = self.__tmpSandBoxDir + str( self.__jobID ) + "/"
    dirac = Dirac()
    if not os.path.exists( tempPath ):
      os.makedirs( tempPath )

    #2.- Introducimos el contenido del inputsandbox en la carpeta temporal
    self.log.debug( 'Step2::: download inputsand to temp folder' )
    settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath )
    self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir )
    moveData = tempPath + "/InputSandbox" + str( self.__jobID )

    #3.- Move the data to client
    self.log.debug( 'Step2::: download inputsandbox to temp folder' )
    HadoopV1InteractiveCli = HadoopV1InteractiveClient( self.__User , self.__publicIP, self.__Port )
    returned = HadoopV1InteractiveCli.dataCopy( tempPath, self.__tmpSandBoxDir )
    self.log.debug( 'Returned of copy the job contain to the Hadoop Master with HadoopInteractive::: ', returned )

    #3.- Get executable file
    result = jobDB.getJobJDL( str( self.__jobID ) , True )
    classAdJob = ClassAd( result['Value'] )
    executableFile = ""
    if classAdJob.lookupAttribute( 'Executable' ):
      executableFile = classAdJob.getAttributeString( 'Executable' )
    self.log.debug( 'Step3::: Get executable file: ', executableFile )

    jobInfo = jobDB.getJobAttributes( self.__jobID )
    if not jobInfo['OK']:
      return S_ERROR( jobInfo['Value'] )
    proxy = ""
    jobInfo = jobInfo['Value']
    if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ):
      proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )
    else:
      proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )

    HadoopInteractiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile
    HadoopInteractiveJobCommand = "InputSandbox" + str( self.__jobID ) + "/" + executableFile + " " + self.__JobName
    HadoopInteractiveJobOutput = tempPath + str( self.__jobID ) + "_" + executableFile + "_out"

    #4.- Creating second part of the job name
    if ( len( re.split( " ", self.__JobName ) ) > 1 ):
    #(name for random writter with -D)name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] + " " + re.split( " ", self.__JobName )[2]
      name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1]
    #(name for random writter with -D)output_job = moveData + "/" + re.split( " ", self.__JobName )[3]

    #(name for random writter with -D)cfg_job = ""
    #(name for random writter with -D)if ( len( re.split( " ", self.__JobName ) ) > 4 ):
    #(name for random writter with -D)  cfg_job = moveData + "/" + re.split( " ", self.__JobName )[4]

    #5.- Parsing execution command
    #cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + output_job + " " + cfg_job
      cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + tempPath + "/InputSandbox" + str( self.__jobID ) + "/" + "/dataset-USC-a-grep '[and]+'"
    else:
      dataset = re.split( "/", self.__Dataset )
      count = 0
      datasetname = ""
      for dir in dataset:
        count = count + 1
        if ( count > 2 ):
          datasetname = datasetname + "/" + dir
      cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + self.__JobName + " " + datasetname + " " + tempPath + "/" + self.__JobName.replace( " ", "" ) + "_" + str( self.__jobID )
    self.log.debug( 'Step4::: Making CMD for submission: ', cmd )

    self.log.debug( 'Step5::: Submit file to hadoop: ' )
    returned = HadoopV1InteractiveCli.jobSubmit( tempPath, HadoopInteractiveJob, proxy['chain'],
                                                 HadoopInteractiveJobOutput, cmd )
    self.log.info( 'Launch Hadoop-HadoopInteractive job to the Master: ', returned )

    if not returned['OK']:
      return S_ERROR( returned['Message'] )
    else:
      self.log.info( 'Hadoop-HadoopInteractive Job ID: ', returned['Value'] )

    return S_OK( returned['Value'] )