Exemple #1
0
  def submitNewBigJob( self ):

    result = jobDB.getJobJDL( str( self.__jobID ) , True )
    classAdJob = ClassAd( result['Value'] )
    executableFile = ""
    if classAdJob.lookupAttribute( 'Executable' ):
      executableFile = classAdJob.getAttributeString( 'Executable' )

    tempPath = self.__tmpSandBoxDir
    dirac = Dirac()
    if not os.path.exists( tempPath ):
      os.makedirs( tempPath )

    settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath )
    self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir )
    moveData = self.__tmpSandBoxDir + "/InputSandbox" + str( self.__jobID )

    HiveV1Cli = HiveV1Client( self.__User , self.__publicIP )
    returned = HiveV1Cli.dataCopy( moveData, self.__tmpSandBoxDir )
    self.log.info( 'Copy the job contain to the Hadoop Master with HIVE: ', returned )

    jobInfo = jobDB.getJobAttributes( self.__jobID )
    if not jobInfo['OK']:
      return S_ERROR( jobInfo['Value'] )
    proxy = ""
    jobInfo = jobInfo['Value']
    if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ):
      proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )
    else:
      proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )

    HiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile
    HiveJobOutput = str( self.__jobID ) + "_" + executableFile + "_out"

    returned = HiveV1Cli.jobSubmit( tempPath, HiveJob, proxy['chain'], HiveJobOutput )
    self.log.info( 'Launch Hadoop-Hive job to the Master: ', returned )

    if not returned['OK']:
      return S_ERROR( returned['Message'] )
    else:
      self.log.info( 'Hadoop-Hive Job ID: ', returned['Value'] )

    return S_OK( returned['Value'] )
  def execute( self ):
    """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Count Pending Jobs
      3.- Submit Jobs
    """
    self.__checkSubmitPools()

    bigDataJobsToSubmit = {}
    bigDataJobIdsToSubmit = {}

    for directorName, directorDict in self.directors.items():
      self.log.verbose( 'Checking Director:', directorName )
      self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints )
      for runningEndPointName in directorDict['director'].runningEndPoints:
        runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName]
        NameNode = runningEndPointDict['NameNode']
        jobsByEndPoint = 0
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint )
        jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint']

        bigDataJobs = 0
        if jobsByEndPoint >= jobLimitsEndPoint:
          self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) )
          continue
        else:
          bigDataJobs = jobLimitsEndPoint - jobsByEndPoint
        requirementsDict = runningEndPointDict['Requirements']

        self.log.info( 'Requirements Dict: ', requirementsDict )
        result = taskQueueDB.getMatchingTaskQueues( requirementsDict )
        if not result['OK']:
          self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
          return result

        taskQueueDict = result['Value']
        self.log.info( 'Task Queues Dict: ', taskQueueDict )
        jobs = 0
        priority = 0
        cpu = 0
        jobsID = 0
        self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs )
        for tq in taskQueueDict:
          jobs += taskQueueDict[tq]['Jobs']
          priority += taskQueueDict[tq]['Priority']
          cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime']

          #Matching of Jobs with BigData Softwares
          #This process is following the sequence:
          #Retrieve a job from taskqueueDict
          #Get job name and try to match with the resources        
          #If not match store the var pendingTaskQueueJobs for the
          #next iteration
          #
          #This matching is doing with the following JobName Pattern
          # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName          
          #extract a job from the TaskQueue
          if tq not in self.pendingTaskQueueJobs.keys():
            self.pendingTaskQueueJobs[tq] = {}
          getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] )
          if not getJobFromTaskQueue['OK']:
            self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] )
            return getJobFromTaskQueue

          jobInfo = getJobFromTaskQueue['Value']
          jobID = jobInfo['jobId']
          jobAttrInfo = jobDB.getJobAttributes( jobID )

          if not jobAttrInfo['OK']:
            self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] )
            return jobAttrInfo
          jobInfoUniq = jobAttrInfo['Value']
          jobName = jobInfoUniq['JobName']
          self.pendingTaskQueueJobs[tq][jobID] = jobName


          result = jobDB.getJobJDL( jobID, True )
          classAdJob = ClassAd( result['Value'] )
          arguments = 0
          if classAdJob.lookupAttribute( 'Arguments' ):
            arguments = classAdJob.getAttributeString( 'Arguments' )
          #if not classAdJob.lookupAttribute( 'Arguments' ):
          #  continue

          jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                       runningEndPointName,
                                                       runningEndPointDict['BigDataSoftware'],
                                                       runningEndPointDict['BigDataSoftwareVersion'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                       jobID )
          if ( jobsToSubmit == "OK" ):
            if directorName not in bigDataJobsToSubmit:
              bigDataJobsToSubmit[directorName] = {}
            if runningEndPointName not in bigDataJobsToSubmit[directorName]:
              bigDataJobsToSubmit[directorName][runningEndPointName] = {}
            bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID,
                                                        'JobName': jobName,
                                                        'TQPriority': priority,
                                                        'CPUTime': cpu,
                                                        'BigDataEndpoint': runningEndPointName,
                                                        'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                        'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                        'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                        'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                        'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                        'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                        'SiteName': runningEndPointDict['SiteName'],
                                                        'PublicIP': runningEndPointDict['PublicIP'],
                                                        'User': runningEndPointDict['User'],
                                                        'Port': runningEndPointDict['Port'],
                                                        'UsePilot': runningEndPointDict['UsePilot'],
                                                        'IsInteractive': runningEndPointDict['IsInteractive'],
                                                        'Arguments': arguments }
            del self.pendingTaskQueueJobs[tq][jobID]
          else:
            self.log.error( jobsToSubmit )
        self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs )
        for tq in self.pendingTaskQueueJobs.keys():
          for jobid in self.pendingTaskQueueJobs[tq].keys():
            result = jobDB.getJobJDL( jobid, True )
            classAdJob = ClassAd( result['Value'] )
            arguments = 0
            if classAdJob.lookupAttribute( 'Arguments' ):
              arguments = classAdJob.getAttributeString( 'Arguments' )
            #if not classAdJob.lookupAttribute( 'Arguments' ):
            #  continue
            #do the match with the runningEndPoint
            jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                             runningEndPointName,
                                                             runningEndPointDict['BigDataSoftware'],
                                                             runningEndPointDict['BigDataSoftwareVersion'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                             jobid )
            if ( jobsToSubmit == "OK" ):
              if directorName not in bigDataJobsToSubmit:
                bigDataJobsToSubmit[directorName] = {}
              if runningEndPointName not in bigDataJobsToSubmit[directorName]:
                bigDataJobsToSubmit[directorName][runningEndPointName] = {}
              bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid,
                                                          'JobName': self.pendingTaskQueueJobs[tq][jobid],
                                                          'TQPriority': priority,
                                                          'CPUTime': cpu,
                                                          'BigDataEndpoint': runningEndPointName,
                                                          'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                          'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                          'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                          'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                          'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                          'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                          'SiteName': runningEndPointDict['SiteName'],
                                                          'PublicIP': runningEndPointDict['PublicIP'],
                                                          'User': runningEndPointDict['User'],
                                                          'Port': runningEndPointDict['Port'],
                                                          'UsePilot': runningEndPointDict['UsePilot'],
                                                          'IsInteractive': runningEndPointDict['IsInteractive'],
                                                          'Arguments': arguments  }
              del self.pendingTaskQueueJobs[tq][jobid]
            else:
             self.log.error( jobsToSubmit )
        if not jobs and not self.pendingTaskQueueJobs:
          self.log.info( 'No matching jobs for %s found, skipping' % NameNode )
          continue

        self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit )

    for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items():
      for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items():
        if self.directors[directorName]['isEnabled']:
          self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) )

          director = self.directors[directorName]['director']
          pool = self.pools[self.directors[directorName]['pool']]

          jobIDs = JobsToSubmitDict[runningEndPointName]['JobId']
          jobName = JobsToSubmitDict[runningEndPointName]['JobName']
          endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint']
          runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName']
          NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode']
          BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware']
          BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion']
          HLLName = JobsToSubmitDict[runningEndPointName]['HLLName']
          HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion']
          PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP']
          User = JobsToSubmitDict[runningEndPointName]['User']
          Port = JobsToSubmitDict[runningEndPointName]['Port']
          UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot']
          IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive']
          Arguments = JobsToSubmitDict[runningEndPointName]['Arguments']
          numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit']

          ret = pool.generateJobAndQueueIt( director.submitBigDataJobs,
                                            args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode,
                                                     BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion,
                                                     PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ),
                                            oCallback = self.callBack,
                                            oExceptionCallback = director.exceptionCallBack,
                                            blocking = False )
          if not ret['OK']:
            # Disable submission until next iteration
            self.directors[directorName]['isEnabled'] = False
          else:
            time.sleep( self.am_getOption( 'ThreadStartDelay' ) )

    if 'Default' in self.pools:
      # only for those in "Default' thread Pool
      # for pool in self.pools:
      self.pools['Default'].processAllResults()

    return DIRAC.S_OK()
  def submitNewBigJob( self ):

    #1.- Creamos carpeta temporal
    self.log.debug( 'Step1::: mkdir temp folder' )
    tempPath = self.__tmpSandBoxDir + str( self.__jobID ) + "/"
    dirac = Dirac()
    if not os.path.exists( tempPath ):
      os.makedirs( tempPath )

    #2.- Introducimos el contenido del inputsandbox en la carpeta temporal
    self.log.debug( 'Step2::: download inputsand to temp folder' )
    settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath )
    self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir )
    moveData = tempPath + "/InputSandbox" + str( self.__jobID )

    #3.- Move the data to client
    self.log.debug( 'Step2::: download inputsandbox to temp folder' )
    HadoopV1InteractiveCli = HadoopV1InteractiveClient( self.__User , self.__publicIP, self.__Port )
    returned = HadoopV1InteractiveCli.dataCopy( tempPath, self.__tmpSandBoxDir )
    self.log.debug( 'Returned of copy the job contain to the Hadoop Master with HadoopInteractive::: ', returned )

    #3.- Get executable file
    result = jobDB.getJobJDL( str( self.__jobID ) , True )
    classAdJob = ClassAd( result['Value'] )
    executableFile = ""
    if classAdJob.lookupAttribute( 'Executable' ):
      executableFile = classAdJob.getAttributeString( 'Executable' )
    self.log.debug( 'Step3::: Get executable file: ', executableFile )

    jobInfo = jobDB.getJobAttributes( self.__jobID )
    if not jobInfo['OK']:
      return S_ERROR( jobInfo['Value'] )
    proxy = ""
    jobInfo = jobInfo['Value']
    if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ):
      proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )
    else:
      proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] )

    HadoopInteractiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile
    HadoopInteractiveJobCommand = "InputSandbox" + str( self.__jobID ) + "/" + executableFile + " " + self.__JobName
    HadoopInteractiveJobOutput = tempPath + str( self.__jobID ) + "_" + executableFile + "_out"

    #4.- Creating second part of the job name
    if ( len( re.split( " ", self.__JobName ) ) > 1 ):
    #(name for random writter with -D)name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] + " " + re.split( " ", self.__JobName )[2]
      name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1]
    #(name for random writter with -D)output_job = moveData + "/" + re.split( " ", self.__JobName )[3]

    #(name for random writter with -D)cfg_job = ""
    #(name for random writter with -D)if ( len( re.split( " ", self.__JobName ) ) > 4 ):
    #(name for random writter with -D)  cfg_job = moveData + "/" + re.split( " ", self.__JobName )[4]

    #5.- Parsing execution command
    #cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + output_job + " " + cfg_job
      cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + tempPath + "/InputSandbox" + str( self.__jobID ) + "/" + "/dataset-USC-a-grep '[and]+'"
    else:
      dataset = re.split( "/", self.__Dataset )
      count = 0
      datasetname = ""
      for dir in dataset:
        count = count + 1
        if ( count > 2 ):
          datasetname = datasetname + "/" + dir
      cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + self.__JobName + " " + datasetname + " " + tempPath + "/" + self.__JobName.replace( " ", "" ) + "_" + str( self.__jobID )
    self.log.debug( 'Step4::: Making CMD for submission: ', cmd )

    self.log.debug( 'Step5::: Submit file to hadoop: ' )
    returned = HadoopV1InteractiveCli.jobSubmit( tempPath, HadoopInteractiveJob, proxy['chain'],
                                                 HadoopInteractiveJobOutput, cmd )
    self.log.info( 'Launch Hadoop-HadoopInteractive job to the Master: ', returned )

    if not returned['OK']:
      return S_ERROR( returned['Message'] )
    else:
      self.log.info( 'Hadoop-HadoopInteractive Job ID: ', returned['Value'] )

    return S_OK( returned['Value'] )