class InteractiveJobLaunchThread ( threading.Thread ):
    def __init__( self, user , publicIP , jobname ):
        threading.Thread.__init__( self )
        self.jobname = jobname
        self.sshConnect = ConnectionUtils( user , publicIP )

    def run( self ):
        self.log = gLogger.getSubLogger( "InteractiveJobLaunchThread" )
        self.jobLaunch( self.jobname )

    def jobLaunch( self, jobname ):
        self.sshConnect.sshCall( 84600, jobname )
Beispiel #2
0
  def __init__( self, User, PublicIP ):

    #Loop of monitoring each 5 (sec.)
    self.monitoringloop = 5

    self.log = gLogger.getSubLogger( "HIVEV1Client" )
    self.user = User
    self.publicIP = PublicIP
    self.sshConnect = ConnectionUtils( self.user , self.publicIP )
  def __init__( self, User, PublicIP, Port ):

    #Loop of monitoring each 5 (sec.)
    self.monitoringloop = 5

    self.log = gLogger.getSubLogger( "HadoopV1InteractiveClient" )
    self.user = User
    self.publicIP = PublicIP
    self.port = Port
    self.sshConnect = ConnectionUtils( self.user , self.publicIP, self.port )
Beispiel #4
0
  def __init__( self, User, PublicIP ):
    self.queueDict = {}
    self.pilot = DIRAC_PILOT
    self.install = DIRAC_INSTALL
    self.genericPilotGroup = 'lhcb_dirac'
    self.genericPilotDN = '/DC=es/DC=irisgrid/O=cesga/CN=victor-fernandez'
    self.genericPilotGroup = 'lhcb_pilot'
    self.pilotLogLevel = 'DEBUG'

    self.log = gLogger.getSubLogger( "HadoopV2Client" )
    self.user = User
    self.publicIP = PublicIP
    self.sshConnect = ConnectionUtils( self.user , self.publicIP )
    def __init__( self, user , publicIP, looptime , parentthread, output, getinfo ):
      threading.Thread.__init__( self )
      self.sshConnect = ConnectionUtils( user , publicIP )
      self.looptime = looptime
      self.parentthread = parentthread
      self.output = output
      self.getinfo = getinfo

      """
      #SandBox Settings
      """
      self.sandboxClient = SandboxStoreClient()
      self.failedFlag = True
      self.sandboxSizeLimit = 1024 * 1024 * 10
class InteractiveJobLaunchPortThread(threading.Thread):
    def __init__(self, user, publicIP, jobname, Port):
        threading.Thread.__init__(self)
        self.jobname = jobname
        self.sshConnect = ConnectionUtils(user, publicIP, Port)

    def run(self):
        self.log = gLogger.getSubLogger("InteractiveJobLaunchThreadPort")
        self.log.warn(":::::::::::::InteractiveJobLaunchPortThread2insiderun")
        self.jobLaunch(self.jobname)

    def jobLaunch(self, jobname):
        result = self.sshConnect.sshCallByPort(84600, jobname)
        self.log.warn(":::::::::::::InteractiveJobLaunchPortThread3", result)
class InteractiveJobMonitorThread ( threading.Thread ):

    def __init__( self, user , publicIP, looptime , parentthread, output, getinfo ):
      threading.Thread.__init__( self )
      self.sshConnect = ConnectionUtils( user , publicIP )
      self.looptime = looptime
      self.parentthread = parentthread
      self.output = output
      self.getinfo = getinfo

      """
      #SandBox Settings
      """
      self.sandboxClient = SandboxStoreClient()
      self.failedFlag = True
      self.sandboxSizeLimit = 1024 * 1024 * 10

    def run( self ):
      self.log = gLogger.getSubLogger( "InteractiveJobMonitorThread" )
      self.monitoring( self.looptime, self.parentthread, self.output )

    def monitoring( self, loop, parentthread, output ):

      self.initialTiming = os.times()
      accountingReport = AccountingJob()
      accountingReport.setStartTime()

      numberJobsFlag = True
      numberJobs = 0
      numberStartedJobsDict = {}
      numberEndingJobsDict = {}

      job_pattern = re.compile( 'Job =.*?,' )
      job_pattern_2 = re.compile( 'Job =.*?\n' )
      jobid = int( re.split( "_", re.split( "/", output )[int( len( re.split( "/", output ) ) - 1 )] )[0] )

      cmd = '/bin/chmod 555 ' + self.getinfo
      returned = self.commandLaunch( cmd )

      while parentthread.isAlive():
        time.sleep( loop )
        if numberJobsFlag:
          cmd = self.getinfo + ' -c step1'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step1:numJobs:', returned )
          if returned != None:
            if ( returned['Value'][1] != "" ):
              if re.split( "=", returned['Value'][1] )[1].strip().isdigit():
                numberJobs = int( re.split( "=", returned['Value'][1] )[1] )
            if ( numberJobs != 0 ):
              numberJobsFlag = False
              BigDataDB.setJobStatus( jobid, "Running" )
        else:
          cmd = self.getinfo + ' -c step2'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step2:startedJobs:', returned )
          if returned != "":
            if ( returned['Value'][1] != "" ):
              startedJobs = job_pattern.findall( returned['Value'][1] )
              self.log.info( 'step2:startedJobs:', startedJobs )
          cmd = self.getinfo + ' -c step3'
          returned = self.commandLaunch( cmd )
          self.log.info( 'InteractiveJobMonitorThread:step3:endedJobs:', returned )
          if returned != "":
            if ( returned['Value'][1] != "" ):
              finishedJobs = job_pattern_2.findall( returned['Value'][1] )
              self.log.info( 'step3:finishedJobs:', finishedJobs )
              if ( len( finishedJobs ) == numberJobs ):
                BigDataDB.setJobStatus( jobid, "Done" )
                BigDataDB.setHadoopID( jobid, finishedJobs )
                self.__updateSandBox( jobid, output )

                #Update Accounting                
                EXECUTION_RESULT = {}
                EXECUTION_RESULT['CPU'] = []
                finalStat = os.times()
                for i in range( len( finalStat ) ):
                  EXECUTION_RESULT['CPU'].append( finalStat[i] - self.initialTiming[i] )
                utime, stime, cutime, cstime, elapsed = EXECUTION_RESULT['CPU']
                cpuTime = utime + stime + cutime + cstime
                execTime = elapsed
                result = jobDB.getJobAttributes( jobid )
                getting = result['Value']
                acData = {
                        'User' : getting['Owner'],
                        'UserGroup' : getting['OwnerGroup'],
                        'JobGroup' : 'cesga',
                        'JobType' : 'User',
                        'JobClass' : 'unknown',
                        'ProcessingType' : 'unknown',
                        'FinalMajorStatus' : getting['Status'],
                        'FinalMinorStatus' : getting['MinorStatus'],
                        'CPUTime' : cpuTime,
                        'Site' : getting['Site'],
                        # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model)
                        'NormCPUTime' : 0,
                        'ExecTime' : cpuTime,
                        'InputDataSize' : 0,
                        'OutputDataSize' : 0,
                        'InputDataFiles' : 0,
                        'OutputDataFiles' : 0,
                        'DiskSpace' : 0,
                        'InputSandBoxSize' : 0,
                        'OutputSandBoxSize' : 0,
                        'ProcessedEvents' : 0
                        }
                accountingReport.setEndTime()
                accountingReport.setValuesFromDict( acData )
                result = accountingReport.commit()



    def commandLaunch( self, cmd ):
      return self.sshConnect.sshCall( 100, cmd )

    def __updateSandBox( self, jobid, output ):

      jobInfo = BigDataDB.getJobIDInfo( jobid )
      result = self.sshConnect.scpCall( 100, output, output, False )

      if not result['OK']:
        self.log.error( 'Error to get the data from BigData Software DFS:', result )

      file_paths = []
      file_paths.append( output )
      outputSandbox = file_paths

      resolvedSandbox = self.__resolveOutputSandboxFiles( outputSandbox )
      if not resolvedSandbox['OK']:
        self.log.warn( 'Output sandbox file resolution failed:' )
        self.log.warn( resolvedSandbox['Message'] )
        self.__report( 'Failed', 'Resolving Output Sandbox' )
      fileList = resolvedSandbox['Value']['Files']
      missingFiles = resolvedSandbox['Value']['Missing']
      if missingFiles:
        self.jobReport.setJobParameter( 'OutputSandboxMissingFiles', ', '.join( missingFiles ), sendFlag = False )

      if fileList and jobid:
        self.outputSandboxSize = getGlobbedTotalSize( fileList )
        self.log.info( 'Attempting to upload Sandbox with limit:', self.sandboxSizeLimit )

        result = self.sandboxClient.uploadFilesAsSandboxForJob( fileList, jobid,
                                                           'Output', self.sandboxSizeLimit ) # 1024*1024*10
        if not result['OK']:
          self.log.error( 'Output sandbox upload failed with message', result['Message'] )
          if result.has_key( 'SandboxFileName' ):
            outputSandboxData = result['SandboxFileName']
            self.log.info( 'Attempting to upload %s as output data' % ( outputSandboxData ) )
            outputData.append( outputSandboxData )
            self.jobReport.setJobParameter( 'OutputSandbox', 'Sandbox uploaded to grid storage', sendFlag = False )
            self.jobReport.setJobParameter( 'OutputSandboxLFN',
                                            self.__getLFNfromOutputFile( outputSandboxData )[0], sendFlag = False )
          else:
            self.log.info( 'Could not get SandboxFileName to attempt upload to Grid storage' )
            return S_ERROR( 'Output sandbox upload failed and no file name supplied for failover to Grid storage' )
        else:
          # Do not overwrite in case of Error
          if not self.failedFlag:
            self.__report( 'Completed', 'Output Sandbox Uploaded' )
          self.log.info( 'Sandbox uploaded successfully' )

      return "OK"

    def __resolveOutputSandboxFiles( self, outputSandbox ):
      """Checks the output sandbox file list and resolves any specified wildcards.
         Also tars any specified directories.
      """
      missing = []
      okFiles = []
      for i in outputSandbox:
        self.log.verbose( 'Looking at OutputSandbox file/directory/wildcard: %s' % i )
        globList = glob.glob( i )
        for check in globList:
          if os.path.isfile( check ):
            self.log.verbose( 'Found locally existing OutputSandbox file: %s' % check )
            okFiles.append( check )
          if os.path.isdir( check ):
            self.log.verbose( 'Found locally existing OutputSandbox directory: %s' % check )
            cmd = ['tar', 'cf', '%s.tar' % check, check]
            result = systemCall( 60, cmd )
            if not result['OK']:
              self.log.error( 'Failed to create OutputSandbox tar', result['Message'] )
            elif result['Value'][0]:
              self.log.error( 'Failed to create OutputSandbox tar', result['Value'][2] )
            if os.path.isfile( '%s.tar' % ( check ) ):
              self.log.verbose( 'Appending %s.tar to OutputSandbox' % check )
              okFiles.append( '%s.tar' % ( check ) )
            else:
              self.log.warn( 'Could not tar OutputSandbox directory: %s' % check )
              missing.append( check )

      for i in outputSandbox:
        if not i in okFiles:
          if not '%s.tar' % i in okFiles:
            if not re.search( '\*', i ):
              if not i in missing:
                missing.append( i )

      result = {'Missing':missing, 'Files':okFiles}
      return S_OK( result )
Beispiel #8
0
class HadoopV1Client:

  def __init__( self, User, PublicIP, Port ):

    self.queueDict = {}
    self.pilot = DIRAC_PILOT
    self.install = DIRAC_INSTALL
    self.genericPilotGroup = 'lhcb_dirac'
    self.genericPilotDN = '/DC=es/DC=irisgrid/O=cesga/CN=victor-fernandez'
    self.genericPilotGroup = 'lhcb_pilot'
    self.pilotLogLevel = 'DEBUG'


    self.log = gLogger.getSubLogger( "HadoopV1Client" )
    self.user = User
    self.publicIP = PublicIP
    self.port = Port
    self.sshConnect = ConnectionUtils( self.user , self.publicIP, self.port )

  def getData( self, temSRC, tempDest ):
    cmdSeq = "hadoop dfs -get " + temSRC + " " + tempDest
    return self.sshConnect.sshCallByPort( 100, cmdSeq )

  def jobSubmit( self, tempPath, jobXMLName, proxy ):
   """ Method to submit job
   """
   executableFile = tempPath + "/" + jobXMLName
    # if no proxy is supplied, the executable can be submitted directly
    # otherwise a wrapper script is needed to get the proxy to the execution node
    # The wrapper script makes debugging more complicated and thus it is
    # recommended to transfer a proxy inside the executable if possible.
   if proxy:
    self.log.verbose( 'Setting up proxy for payload' )
    compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace( '\n', '' )
    compressedAndEncodedExecutable = base64.encodestring( bz2.compress( open( executableFile, "rb" ).read(), 9 ) ).replace( '\n', '' )

    wrapperContent = """#!/usr/bin/env python
# Wrapper script for executable and proxy
import os, tempfile, sys, base64, bz2, shutil
try:
  workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'BigDat_' )
  os.chdir( workingDirectory )
  open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) )
  open( '%(executable)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) )
  os.chmod('proxy',0600)
  os.chmod('%(executable)s',0700)
  os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy')
except Exception, x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "hadoop job -submit %(executable)s"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )              
shutil.rmtree( workingDirectory )
            """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \
                    'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \
                    'executable': executableFile }

    fd, name = tempfile.mkstemp( suffix = '_wrapper.py', prefix = 'BigDat_', dir = tempPath )
    wrapper = os.fdopen( fd, 'w' )
    wrapper.write( wrapperContent )
    wrapper.close()

    submitFile = name

   else: # no proxy
     submitFile = executableFile

   # Copy the executable
   os.chmod( submitFile, stat.S_IRUSR | stat.S_IXUSR )
   sFile = os.path.basename( submitFile )
   result = self.sshConnect.scpCallByPort( 10, submitFile, '%s/%s' % ( tempPath, os.path.basename( submitFile ) ) )

   # submit submitFile to the batch system
   cmd = submitFile

   self.log.verbose( 'BigData submission command: %s' % ( cmd ) )

   result = self.sshConnect.sshCallByPort( 10, cmd )

   self.log.debug( 'BigData Hadoop V.1 result OK', result )
   if not result['OK']:
     for getit in result['Value']:
       resulting = re.search( "job_(\d+)_(\d+)", str( getit ) )
       if ( resulting != None ):
         self.log.debug( 'BigData Hadoop V.1 result OK' )
         return S_OK( resulting.group( 0 ).rstrip() )
     self.log.warn( '===========> SSH BigData Hadoop V.1 result NOT OK' )
     self.log.debug( result )
     return S_ERROR( result )
   else:
     self.log.debug( 'BigData Hadoop V.1 result OK' )

   for getit in result['Value']:
     resulting = re.search( "job_(\d+)_(\d+)", str( getit ) )
     if ( resulting != None ):
       return S_OK( resulting.group( 0 ).rstrip() )
   return S_ERROR( result['Value'] )

  def delHadoopData( self, tempPath ):
    cmdSeq = "hadoop dfs -rmr " + tempPath
    #cmdSeq = "hadoop dfs -ls " + tempPath
    return self.sshConnect.sshCallByPort( 100, cmdSeq )

  def delData( self, tempPath ):
    cmdSeq = "rm -Rf " + tempPath
    return self.sshConnect.sshCallByPort( 100, cmdSeq )

  def dataCopy( self, tempPath, tmpSandBoxDir ):
    return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir )

  def getdata( self, tempPath, tmpSandBoxDir ):
    return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir, False )

  def jobStatus( self, jobId, user, host ):

    cmdSeq = "ssh -p " + str( self.port ) + " -l " + user + " " + host + " 'hadoop job -list all | awk -v job_id=" + jobId.strip() + " "\
            " '\"'\"'BEGIN{OFS=\"\\t\"; FS=\"\\t\"; final_state=\"Unknown\"}" \
            "$0 == \"States are:\" {getline; for(i=1;i<=NF;i++) { split($i,s,\" \"); states[s[3]] = s[1] }} $1==job_id { final_state=states[$2]; exit} END{print final_state}'\"'\""

    gLogger.info( 'Command Submitted: ', cmdSeq )
    return self.sshConnect.sshOnlyCall( 10, cmdSeq )

  def newJob( self, path, jobDiracId, bdJobId ):

    cmdSeq = "ssh -p " + str( self.port ) + " -l " + self.user + " " + self.publicIP + " /" + path + "/" + str( jobDiracId ) + "/BigDat_*_getInfo.py -c step1 | wc -l"
    gLogger.info( 'Command Submitted: ', cmdSeq )
    returned = self.sshConnect.sshOnlyCall( 10, cmdSeq )
    gLogger.info( 'Command Submitted: ', returned )
    if returned != None:
      if ( returned['Value'][1] != "" ):
        if ( returned['Value'][1] > 1 ):
          cmdSeq = "ssh -p " + str( self.port ) + " -l " + self.user + " " + self.publicIP + " /" + path + "/" + str( jobDiracId ) + "/BigDat_*_getInfo.py -c step1 | tail -n1"
          gLogger.info( 'Command Submitted: ', cmdSeq )
          returned = self.sshConnect.sshOnlyCall( 10, cmdSeq )
          gLogger.info( 'Command Submitted: ', returned )
          if returned != None:
            if ( returned['Value'][1] != "" ):
              resulting = re.search( "job_+([^:]+)", returned['Value'][1] ).group( 0 ).rstrip()
              if ( bdJobId == resulting ):
                gLogger.info( 'The BD job id is the same' )
                return S_ERROR( "Same JobID" )
              else:
                gLogger.info( 'The new BD job id is: ', resulting )
                return S_OK( resulting )
    return S_ERROR( returned )

  def jobCompleteStatus( self, jobId ):

    cmdSeq = "hadoop job -status " + jobId

    gLogger.info( 'Command Submitted: ', cmdSeq )
    return self.sshConnect.sshCallByPort( 100, cmdSeq )

#################################################################################################################
  def submitPilotJob( self, tempPath, jobXMLName, proxy ):
    executableFile = self.__getExecutable( proxy, tempPath )
    self.log.verbose( "Executable file path: %s" % executableFile )

    if executableFile['OK']:
      executableFile = executableFile['Value']
    else:
      S_ERROR( executableFile )

    if not os.access( executableFile, 5 ):
      os.chmod( executableFile, 0755 )

    self.log.verbose( "Copy file " )
    self.dataCopy( tempPath, "/tmp" )

    executableFile = "sh -c " + executableFile + " $*  > " + tempPath + "/3J5jVr.out 2> " + tempPath + "/3J5jVr.err"
    self.log.verbose( "Executable file command: ", executableFile )
    result = self.sshConnect.sshCallByPort( 100, executableFile )

    if not result['OK']:
      self.log.warn( '===========> SSH BigData Hadoop V.1 result NOT OK' )
      self.log.debug( result )
      return S_ERROR( result )
    else:
      self.log.debug( 'BigData Hadoop V.1 result OK' )
    return S_OK( result )

  def __getExecutable( self, proxy, tempPath ):
    #######################################cambiar    

    self.queueDict[114] = {}
    self.queueDict[114]['ParametersDict'] = {}
    self.queueDict[114]['ParametersDict']['CPUTime'] = '1600'
    self.queueDict[114]['CEName'] = 'CesgaHadoop'
    self.queueDict[114]['ParametersDict']['Site'] = 'CESGA'
    #######################################
    pilotOptions = self.__getPilotOptions( 114, 1 )
    if pilotOptions is None:
      return S_ERROR( 'Errors in compiling pilot options' )
    executable = self.__writePilotScript( tempPath, pilotOptions, proxy, "", tempPath )
    result = S_OK( executable )
    return result

  def __writePilotScript( self, workingDirectory, pilotOptions, proxy = '', httpProxy = '', pilotExecDir = '' ):
    """ Bundle together and write out the pilot executable script, admixt the proxy if given
    """

    try:
      compressedAndEncodedProxy = ''
      proxyFlag = 'False'
      if proxy:
        compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) )
        proxyFlag = 'True'
      compressedAndEncodedPilot = base64.encodestring( bz2.compress( open( self.pilot, "rb" ).read(), 9 ) )
      compressedAndEncodedInstall = base64.encodestring( bz2.compress( open( self.install, "rb" ).read(), 9 ) )
    except:
      self.log.exception( 'Exception during file compression of proxy, dirac-pilot or dirac-install' )
      return S_ERROR( 'Exception during file compression of proxy, dirac-pilot or dirac-install' )

    localPilot = """#!/bin/bash
/usr/bin/env python << EOF
#
import os, tempfile, sys, shutil, base64, bz2
try:
  pilotExecDir = '%(pilotExecDir)s'
  if not pilotExecDir:
    pilotExecDir = None
  pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir )
  pilotWorkingDirectory = os.path.realpath( pilotWorkingDirectory )
  os.chdir( pilotWorkingDirectory )
  if %(proxyFlag)s:
    open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedProxy)s\"\"\" ) ) )
    os.chmod("proxy",0600)
    os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy')
  open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedPilot)s\"\"\" ) ) )
  open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedInstall)s\"\"\" ) ) )
  os.chmod("%(pilotScript)s",0700)
  os.chmod("%(installScript)s",0700)
  if "LD_LIBRARY_PATH" not in os.environ:
    os.environ["LD_LIBRARY_PATH"]=""
  if "%(httpProxy)s":
    os.environ["HTTP_PROXY"]="%(httpProxy)s"
  os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates')
  # TODO: structure the output
  print '==========================================================='
  print 'Environment of execution host'
  for key in os.environ.keys():
    print key + '=' + os.environ[key]
  print '==========================================================='
except Exception, x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "python %(pilotScript)s %(pilotOptions)s"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )

shutil.rmtree( pilotWorkingDirectory )

EOF
""" % { 'compressedAndEncodedProxy': compressedAndEncodedProxy,
        'compressedAndEncodedPilot': compressedAndEncodedPilot,
        'compressedAndEncodedInstall': compressedAndEncodedInstall,
        'httpProxy': httpProxy,
        'pilotExecDir': pilotExecDir,
        'pilotScript': os.path.basename( self.pilot ),
        'installScript': os.path.basename( self.install ),
        'pilotOptions': ' '.join( pilotOptions ),
        'proxyFlag': proxyFlag }

    fd, name = tempfile.mkstemp( suffix = '_pilotwrapper.py', prefix = 'DIRAC_', dir = workingDirectory )
    pilotWrapper = os.fdopen( fd, 'w' )
    pilotWrapper.write( localPilot )
    pilotWrapper.close()
    return name

  def updatePilotStatus( self ):
    """ Update status of pilots in transient states
    """
    for queue in self.queueDict:
      ce = self.queueDict[queue]['CE']
      ceName = self.queueDict[queue]['CEName']
      queueName = self.queueDict[queue]['QueueName']
      ceType = self.queueDict[queue]['CEType']
      siteName = self.queueDict[queue]['Site']

      result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName,
                                           'Queue':queueName,
                                           'GridType':ceType,
                                           'GridSite':siteName,
                                           'Status':TRANSIENT_PILOT_STATUS} )
      if not result['OK']:
        self.log.error( 'Failed to select pilots: %s' % result['Message'] )
        continue
      pilotRefs = result['Value']
      if not pilotRefs:
        continue

      #print "AT >>> pilotRefs", pilotRefs

      result = pilotAgentsDB.getPilotInfo( pilotRefs )
      if not result['OK']:
        self.log.error( 'Failed to get pilots info: %s' % result['Message'] )
        continue
      pilotDict = result['Value']

      #print "AT >>> pilotDict", pilotDict

      stampedPilotRefs = []
      for pRef in pilotDict:
        if pilotDict[pRef]['PilotStamp']:
          stampedPilotRefs.append( pRef + ":::" + pilotDict[pRef]['PilotStamp'] )
        else:
          stampedPilotRefs = list( pilotRefs )
          break

      result = ce.getJobStatus( stampedPilotRefs )
      if not result['OK']:
        self.log.error( 'Failed to get pilots status from CE: %s' % result['Message'] )
        continue
      pilotCEDict = result['Value']

      #print "AT >>> pilotCEDict", pilotCEDict

      for pRef in pilotRefs:
        newStatus = ''
        oldStatus = pilotDict[pRef]['Status']
        ceStatus = pilotCEDict[pRef]
        if oldStatus == ceStatus:
          # Status did not change, continue
          continue
        elif ceStatus == "Unknown" and not oldStatus in FINAL_PILOT_STATUS:
          # Pilot finished without reporting, consider it Aborted
          newStatus = 'Aborted'
        elif ceStatus != 'Unknown' :
          # Update the pilot status to the new value
          newStatus = ceStatus

        if newStatus:
          self.log.info( 'Updating status to %s for pilot %s' % ( newStatus, pRef ) )
          result = pilotAgentsDB.setPilotStatus( pRef, newStatus, '', 'Updated by SiteDirector' )
        # Retrieve the pilot output now
        if newStatus in FINAL_PILOT_STATUS:
          if pilotDict[pRef]['OutputReady'].lower() == 'false' and self.getOutput:
            self.log.info( 'Retrieving output for pilot %s' % pRef )
            pilotStamp = pilotDict[pRef]['PilotStamp']
            pRefStamp = pRef
            if pilotStamp:
              pRefStamp = pRef + ':::' + pilotStamp
            result = ce.getJobOutput( pRefStamp )
            if not result['OK']:
              self.log.error( 'Failed to get pilot output: %s' % result['Message'] )
            else:
              output, error = result['Value']
              result = pilotAgentsDB.storePilotOutput( pRef, output, error )
              if not result['OK']:
                self.log.error( 'Failed to store pilot output: %s' % result['Message'] )

    # The pilot can be in Done state set by the job agent check if the output is retrieved
    for queue in self.queueDict:
      ce = self.queueDict[queue]['CE']

      if not ce.isProxyValid( 120 ):
        result = gProxyManager.getPilotProxyFromDIRACGroup( self.genericPilotDN, self.genericPilotGroup, 1000 )
        if not result['OK']:
          return result
        ce.setProxy( self.proxy, 940 )

      ceName = self.queueDict[queue]['CEName']
      queueName = self.queueDict[queue]['QueueName']
      ceType = self.queueDict[queue]['CEType']
      siteName = self.queueDict[queue]['Site']
      result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName,
                                           'Queue':queueName,
                                           'GridType':ceType,
                                           'GridSite':siteName,
                                           'OutputReady':'False',
                                           'Status':FINAL_PILOT_STATUS} )

      if not result['OK']:
        self.log.error( 'Failed to select pilots: %s' % result['Message'] )
        continue
      pilotRefs = result['Value']
      if not pilotRefs:
        continue
      result = pilotAgentsDB.getPilotInfo( pilotRefs )
      if not result['OK']:
        self.log.error( 'Failed to get pilots info: %s' % result['Message'] )
        continue
      pilotDict = result['Value']
      if self.getOutput:
        for pRef in pilotRefs:
          self.log.info( 'Retrieving output for pilot %s' % pRef )
          pilotStamp = pilotDict[pRef]['PilotStamp']
          pRefStamp = pRef
          if pilotStamp:
            pRefStamp = pRef + ':::' + pilotStamp
          result = ce.getJobOutput( pRefStamp )
          if not result['OK']:
            self.log.error( 'Failed to get pilot output: %s' % result['Message'] )
          else:
            output, error = result['Value']
            result = pilotAgentsDB.storePilotOutput( pRef, output, error )
            if not result['OK']:
              self.log.error( 'Failed to store pilot output: %s' % result['Message'] )

      # Check if the accounting is to be sent
      if self.sendAccounting:
        result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName,
                                             'Queue':queueName,
                                             'GridType':ceType,
                                             'GridSite':siteName,
                                             'AccountingSent':'False',
                                             'Status':FINAL_PILOT_STATUS} )

        if not result['OK']:
          self.log.error( 'Failed to select pilots: %s' % result['Message'] )
          continue
        pilotRefs = result['Value']
        if not pilotRefs:
          continue
        result = pilotAgentsDB.getPilotInfo( pilotRefs )
        if not result['OK']:
          self.log.error( 'Failed to get pilots info: %s' % result['Message'] )
          continue
        pilotDict = result['Value']
        result = self.sendPilotAccounting( pilotDict )
        if not result['OK']:
          self.log.error( 'Failed to send pilot agent accounting' )

    return S_OK()

  def __getPilotOptions( self, queue, pilotsToSubmit ):
    """ Prepare pilot options
    """

    queueDict = self.queueDict[queue]['ParametersDict']
    pilotOptions = []

    setup = gConfig.getValue( "/DIRAC/Setup", "unknown" )
    if setup == 'unknown':
      self.log.error( 'Setup is not defined in the configuration' )
      return None
    pilotOptions.append( '-S %s' % setup )
    opsHelper = Operations.Operations( group = self.genericPilotGroup, setup = setup )

    #Installation defined?
    installationName = opsHelper.getValue( "Pilot/Installation", "" )
    if installationName:
      pilotOptions.append( '-V %s' % installationName )

    #Project defined?
    projectName = opsHelper.getValue( "Pilot/Project", "" )
    if projectName:
      pilotOptions.append( '-l %s' % projectName )
    else:
      self.log.info( 'DIRAC project will be installed by pilots' )

    #Request a release
    diracVersion = opsHelper.getValue( "Pilot/Version", [] )
    #####borrar
    diracVersion = "v6r4"
    if not diracVersion:
      self.log.error( 'Pilot/Version is not defined in the configuration' )
      return None
    #diracVersion is a list of accepted releases. Just take the first one
    pilotOptions.append( '-r %s' % diracVersion )

    ownerDN = self.genericPilotDN
    ownerGroup = self.genericPilotGroup
    result = gProxyManager.requestToken( ownerDN, ownerGroup, pilotsToSubmit * 5 )
    if not result[ 'OK' ]:
      self.log.error( ERROR_TOKEN, result['Message'] )
      return S_ERROR( ERROR_TOKEN )
    ( token, numberOfUses ) = result[ 'Value' ]
    pilotOptions.append( '-o /Security/ProxyToken=%s' % token )
    # Use Filling mode
    pilotOptions.append( '-M %s' % 5 )

    # Debug
    if self.pilotLogLevel.lower() == 'debug':
      pilotOptions.append( '-d' )
    # CS Servers
    csServers = gConfig.getValue( "/DIRAC/Configuration/Servers", [] )
    pilotOptions.append( '-C %s' % ",".join( csServers ) )
    # DIRAC Extensions
   # extensionsList = CSGlobals.getCSExtensions()
   # if extensionsList:
   #   pilotOptions.append( '-e %s' % ",".join( extensionsList ) )
    # Requested CPU time
    pilotOptions.append( '-T %s' % queueDict['CPUTime'] )
    # CEName
    pilotOptions.append( '-N %s' % self.queueDict[queue]['CEName'] )
    # SiteName
    pilotOptions.append( '-n %s' % queueDict['Site'] )
    if 'ClientPlatform' in queueDict:
      pilotOptions.append( "-p '%s'" % queueDict['ClientPlatform'] )

    if 'SharedArea' in queueDict:
      pilotOptions.append( "-o '/LocalSite/SharedArea=%s'" % queueDict['SharedArea'] )

    group = "lhcb_pilot"
    if group:
      pilotOptions.append( '-G %s' % group )

    self.log.verbose( "pilotOptions: ", ' '.join( pilotOptions ) )

    return pilotOptions

  def sendPilotAccounting( self, pilotDict ):
    """ Send pilot accounting record
    """
    for pRef in pilotDict:
      self.log.verbose( 'Preparing accounting record for pilot %s' % pRef )
      pA = PilotAccounting()
      pA.setEndTime( pilotDict[pRef][ 'LastUpdateTime' ] )
      pA.setStartTime( pilotDict[pRef][ 'SubmissionTime' ] )
      retVal = CS.getUsernameForDN( pilotDict[pRef][ 'OwnerDN' ] )
      if not retVal[ 'OK' ]:
        userName = '******'
        self.log.error( "Can't determine username for dn:", pilotDict[pRef][ 'OwnerDN' ] )
      else:
        userName = retVal[ 'Value' ]
      pA.setValueByKey( 'User', userName )
      pA.setValueByKey( 'UserGroup', pilotDict[pRef][ 'OwnerGroup' ] )
      result = getSiteForCE( pilotDict[pRef][ 'DestinationSite' ] )
      if result['OK'] and result[ 'Value' ].strip():
        pA.setValueByKey( 'Site', result['Value'].strip() )
      else:
        pA.setValueByKey( 'Site', 'Unknown' )
      pA.setValueByKey( 'GridCE', pilotDict[pRef][ 'DestinationSite' ] )
      pA.setValueByKey( 'GridMiddleware', pilotDict[pRef][ 'GridType' ] )
      pA.setValueByKey( 'GridResourceBroker', pilotDict[pRef][ 'Broker' ] )
      pA.setValueByKey( 'GridStatus', pilotDict[pRef][ 'Status' ] )
      if not 'Jobs' in pilotDict[pRef]:
        pA.setValueByKey( 'Jobs', 0 )
      else:
        pA.setValueByKey( 'Jobs', len( pilotDict[pRef]['Jobs'] ) )
      self.log.info( "Adding accounting record for pilot %s" % pilotDict[pRef][ 'PilotID' ] )
      retVal = gDataStoreClient.addRegister( pA )
      if not retVal[ 'OK' ]:
        self.log.error( 'Failed to send accounting info for pilot %s' % pRef )
      else:
        # Set up AccountingSent flag
        result = pilotAgentsDB.setAccountingFlag( pRef )
        if not result['OK']:
          self.log.error( 'Failed to set accounting flag for pilot %s' % pRef )

    self.log.info( 'Committing accounting records for %d pilots' % len( pilotDict ) )
    result = gDataStoreClient.commit()
    if result['OK']:
      for pRef in pilotDict:
        self.log.verbose( 'Setting AccountingSent flag for pilot %s' % pRef )
        result = pilotAgentsDB.setAccountingFlag( pRef )
        if not result['OK']:
          self.log.error( 'Failed to set accounting flag for pilot %s' % pRef )
    else:
      return result

    return S_OK()
class HadoopV1InteractiveClient( object ):
  jobid = 0
  user = ""
  ip = ""

  def __init__( self, User, PublicIP, Port ):

    #Loop of monitoring each 5 (sec.)
    self.monitoringloop = 5

    self.log = gLogger.getSubLogger( "HadoopV1InteractiveClient" )
    self.user = User
    self.publicIP = PublicIP
    self.port = Port
    self.sshConnect = ConnectionUtils( self.user , self.publicIP, self.port )

  def jobSubmit( self, tempPath, HadoopInteractiveJob, proxy,
                 HadoopInteractiveJobOutput, HadoopInteractiveJobCommand ):
   """ Method to submit job
   """
    # if no proxy is supplied, the executable can be submitted directly
    # otherwise a wrapper script is needed to get the proxy to the execution node
    # The wrapper script makes debugging more complicated and thus it is
    # recommended to transfer a proxy inside the executable if possible.
   HadoopInteractiveJobPath = tempPath + "/" + HadoopInteractiveJob
   self.log.debug( 'Step6::: Creating jar Path: ', HadoopInteractiveJobPath )
   if proxy:
    self.log.verbose( 'Setting up proxy for payload' )
    compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace( '\n', '' )
    compressedAndEncodedExecutable = base64.encodestring( bz2.compress( open( HadoopInteractiveJobPath, "rb" ).read(), 9 ) ).replace( '\n', '' )

    wrapperContent = """#!/usr/bin/env python
# Wrapper script for executable and proxy
import os, tempfile, sys, base64, bz2, shutil
try:
  workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'BigDat_' )
  os.chdir( workingDirectory )
  open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) )
  open( '%(executablepath)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) )
  os.chmod('proxy',0600)
  os.chmod('%(executablepath)s',0700)
  os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy')
except Exception, x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "%(executable)s > %(HadoopInteractiveJobOutput)s 2>&1"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )              
shutil.rmtree( workingDirectory )
              """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \
                    'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \
                    'executablepath': HadoopInteractiveJobPath, \
                    'executable': HadoopInteractiveJobCommand, \
                    'HadoopInteractiveJobOutput': HadoopInteractiveJobOutput }

    fd, name = tempfile.mkstemp( suffix = '_wrapper.py', prefix = 'BigDat_', dir = tempPath )
    wrapper = os.fdopen( fd, 'w' )
    wrapper.write( wrapperContent )
    wrapper.close()
    self.log.debug( 'Step7::: Creating payload: ' )
    submitFile = name

    wrapperContent = """#!/usr/bin/env python
# Wrapper script for executable and proxy
import os, tempfile, sys, base64, bz2, shutil, getopt,re

def main(argv):
   inputfile = ''
   command = ''
   try:
      opts, args = getopt.getopt(argv,'h:c:',[''])
   except getopt.GetoptError:
      print 'name.py -c <command>'
      sys.exit(2)
   for opt, arg in opts:
      if opt == '-h':
         print 'name.py -c <command>'
         sys.exit()
      elif opt in ('-c', '--command'):
         command = arg
   if (command == 'step1'):
        cmd = 'grep "Running job:" %(HadoopInteractiveJobOutput)s'
        returned = os.system(cmd)

if __name__ == '__main__':
   main(sys.argv[1:])     
              """ % { 'HadoopInteractiveJobOutput': HadoopInteractiveJobOutput }

    fd, name = tempfile.mkstemp( suffix = '_getInfo.py', prefix = 'BigDat_', dir = tempPath )
    wrapper = os.fdopen( fd, 'w' )
    wrapper.write( wrapperContent )
    wrapper.close()

    submitFile2 = name
    self.log.debug( 'Step8::: Creating wrapper: ' )
   else: # no proxy
     submitFile = HadoopInteractiveJob

   # Copy the executable
   self.log.debug( 'Step9::: Moving Wrapper and Payload: ' )
   os.chmod( submitFile, stat.S_IRUSR | stat.S_IXUSR )
   sFile = os.path.basename( submitFile )
   returned = self.sshConnect.scpCallByPort( 100, submitFile, '%s/%s' % ( tempPath, os.path.basename( submitFile ) ) )
   returned2 = self.sshConnect.scpCallByPort( 100, submitFile2, '%s/%s' % ( tempPath, os.path.basename( submitFile2 ) ) )

   if not returned['OK']:
      return S_ERROR( returned['Message'] )
   if not returned2['OK']:
      return S_ERROR( returned2['Message'] )

   # submit submitFile to the batch system
   cmd = submitFile

   self.log.verbose( 'BigData submission command: %s' % ( cmd ) )

   thread1 = InteractiveJobLaunchPortThread( self.user, self.publicIP , cmd, self.port )
   #thread2 = InteractiveJobMonitorThread( self.user, self.publicIP, self.monitoringloop,
   #                                       thread1, tempPath + HadoopInteractiveJobOutput, submitFile2 )

   thread1.start()

   time.sleep( 5 )

   if not thread1.isAlive():
     self.log.warn( '===========> SSH BigData Hadoop-HadoopInteractive thread V.1 result NOT OK' )
     return S_ERROR( "Error launching Hadoop-HadoopInteractive Thread" )

   self.log.debug( 'BigData Hadoop-HadoopInteractive V.1 result OK', thread1.getName() )

   self.log.debug( 'Step10::: Stop process for get the JobID: ' )
   cmd = '/bin/chmod 555 ' + submitFile2
   self.sshConnect.sshCallByPort( 100, cmd )

   cmd = submitFile2 + ' -c step1'
   returned = self.sshConnect.sshCallByPort( 100, cmd )
   self.log.debug( 'Step11:::InteractiveJobMonitorThread:step1:getJobID:', returned )
   if returned != None:
     if ( returned['Value'][1] != "" ):
       resulting = re.search( "job_+([^:]+)", returned['Value'][1] )
       if ( resulting != None ):
         self.log.debug( 'Step12:::InteractiveJobMonitorThread:step1:JobID:', resulting.group( 0 ).rstrip() )
         return S_OK( resulting.group( 0 ).rstrip() )

  def getData( self, temSRC, tempDest ):
    cmdSeq = "hadoop dfs - get " + temSRC + " " + tempDest
    return self.sshConnect.sshCallByPort( 86400, cmdSeq )

  def delData( self, tempPath ):
    cmdSeq = "rm - Rf " + tempPath
    return self.sshConnect.sshCallByPort( 100, cmdSeq )

  def dataCopy( self, tempPath, tmpSandBoxDir ):
    return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir )

  def getdata( self, tempPath, tmpSandBoxDir ):
    return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir, False )

  def jobStatus( self, jobId, user, host ):
    cmdSeq = "ssh - l " + user + " " + host + " 'hadoop job -list all | awk -v job_id=" + jobId.strip() + " "\
        " '\"'\"'BEGIN{OFS=\"\t\"; FS=\"\t\"; final_state=\"Unknown\"} "\
        "$0 == \"States are:\" {getline; for(i=1;i<=NF;i++) { split($i,s,\" \"); states[s[3]] = s[1] }} $1==job_id { final_state=states[$2]; exit} END{print final_state}'\"'\""

    gLogger.info( 'Command Submitted: ', cmdSeq )
    return self.sshConnect.sshOnlyCall( 100, cmdSeq )
 def __init__(self, user, publicIP, jobname, Port):
     threading.Thread.__init__(self)
     self.jobname = jobname
     self.sshConnect = ConnectionUtils(user, publicIP, Port)