class InteractiveJobLaunchPortThread(threading.Thread): def __init__(self, user, publicIP, jobname, Port): threading.Thread.__init__(self) self.jobname = jobname self.sshConnect = ConnectionUtils(user, publicIP, Port) def run(self): self.log = gLogger.getSubLogger("InteractiveJobLaunchThreadPort") self.log.warn(":::::::::::::InteractiveJobLaunchPortThread2insiderun") self.jobLaunch(self.jobname) def jobLaunch(self, jobname): result = self.sshConnect.sshCallByPort(84600, jobname) self.log.warn(":::::::::::::InteractiveJobLaunchPortThread3", result)
class HadoopV1Client: def __init__( self, User, PublicIP, Port ): self.queueDict = {} self.pilot = DIRAC_PILOT self.install = DIRAC_INSTALL self.genericPilotGroup = 'lhcb_dirac' self.genericPilotDN = '/DC=es/DC=irisgrid/O=cesga/CN=victor-fernandez' self.genericPilotGroup = 'lhcb_pilot' self.pilotLogLevel = 'DEBUG' self.log = gLogger.getSubLogger( "HadoopV1Client" ) self.user = User self.publicIP = PublicIP self.port = Port self.sshConnect = ConnectionUtils( self.user , self.publicIP, self.port ) def getData( self, temSRC, tempDest ): cmdSeq = "hadoop dfs -get " + temSRC + " " + tempDest return self.sshConnect.sshCallByPort( 100, cmdSeq ) def jobSubmit( self, tempPath, jobXMLName, proxy ): """ Method to submit job """ executableFile = tempPath + "/" + jobXMLName # if no proxy is supplied, the executable can be submitted directly # otherwise a wrapper script is needed to get the proxy to the execution node # The wrapper script makes debugging more complicated and thus it is # recommended to transfer a proxy inside the executable if possible. if proxy: self.log.verbose( 'Setting up proxy for payload' ) compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace( '\n', '' ) compressedAndEncodedExecutable = base64.encodestring( bz2.compress( open( executableFile, "rb" ).read(), 9 ) ).replace( '\n', '' ) wrapperContent = """#!/usr/bin/env python # Wrapper script for executable and proxy import os, tempfile, sys, base64, bz2, shutil try: workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'BigDat_' ) os.chdir( workingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(executable)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) ) os.chmod('proxy',0600) os.chmod('%(executable)s',0700) os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy') except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "hadoop job -submit %(executable)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( workingDirectory ) """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \ 'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \ 'executable': executableFile } fd, name = tempfile.mkstemp( suffix = '_wrapper.py', prefix = 'BigDat_', dir = tempPath ) wrapper = os.fdopen( fd, 'w' ) wrapper.write( wrapperContent ) wrapper.close() submitFile = name else: # no proxy submitFile = executableFile # Copy the executable os.chmod( submitFile, stat.S_IRUSR | stat.S_IXUSR ) sFile = os.path.basename( submitFile ) result = self.sshConnect.scpCallByPort( 10, submitFile, '%s/%s' % ( tempPath, os.path.basename( submitFile ) ) ) # submit submitFile to the batch system cmd = submitFile self.log.verbose( 'BigData submission command: %s' % ( cmd ) ) result = self.sshConnect.sshCallByPort( 10, cmd ) self.log.debug( 'BigData Hadoop V.1 result OK', result ) if not result['OK']: for getit in result['Value']: resulting = re.search( "job_(\d+)_(\d+)", str( getit ) ) if ( resulting != None ): self.log.debug( 'BigData Hadoop V.1 result OK' ) return S_OK( resulting.group( 0 ).rstrip() ) self.log.warn( '===========> SSH BigData Hadoop V.1 result NOT OK' ) self.log.debug( result ) return S_ERROR( result ) else: self.log.debug( 'BigData Hadoop V.1 result OK' ) for getit in result['Value']: resulting = re.search( "job_(\d+)_(\d+)", str( getit ) ) if ( resulting != None ): return S_OK( resulting.group( 0 ).rstrip() ) return S_ERROR( result['Value'] ) def delHadoopData( self, tempPath ): cmdSeq = "hadoop dfs -rmr " + tempPath #cmdSeq = "hadoop dfs -ls " + tempPath return self.sshConnect.sshCallByPort( 100, cmdSeq ) def delData( self, tempPath ): cmdSeq = "rm -Rf " + tempPath return self.sshConnect.sshCallByPort( 100, cmdSeq ) def dataCopy( self, tempPath, tmpSandBoxDir ): return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir ) def getdata( self, tempPath, tmpSandBoxDir ): return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir, False ) def jobStatus( self, jobId, user, host ): cmdSeq = "ssh -p " + str( self.port ) + " -l " + user + " " + host + " 'hadoop job -list all | awk -v job_id=" + jobId.strip() + " "\ " '\"'\"'BEGIN{OFS=\"\\t\"; FS=\"\\t\"; final_state=\"Unknown\"}" \ "$0 == \"States are:\" {getline; for(i=1;i<=NF;i++) { split($i,s,\" \"); states[s[3]] = s[1] }} $1==job_id { final_state=states[$2]; exit} END{print final_state}'\"'\"" gLogger.info( 'Command Submitted: ', cmdSeq ) return self.sshConnect.sshOnlyCall( 10, cmdSeq ) def newJob( self, path, jobDiracId, bdJobId ): cmdSeq = "ssh -p " + str( self.port ) + " -l " + self.user + " " + self.publicIP + " /" + path + "/" + str( jobDiracId ) + "/BigDat_*_getInfo.py -c step1 | wc -l" gLogger.info( 'Command Submitted: ', cmdSeq ) returned = self.sshConnect.sshOnlyCall( 10, cmdSeq ) gLogger.info( 'Command Submitted: ', returned ) if returned != None: if ( returned['Value'][1] != "" ): if ( returned['Value'][1] > 1 ): cmdSeq = "ssh -p " + str( self.port ) + " -l " + self.user + " " + self.publicIP + " /" + path + "/" + str( jobDiracId ) + "/BigDat_*_getInfo.py -c step1 | tail -n1" gLogger.info( 'Command Submitted: ', cmdSeq ) returned = self.sshConnect.sshOnlyCall( 10, cmdSeq ) gLogger.info( 'Command Submitted: ', returned ) if returned != None: if ( returned['Value'][1] != "" ): resulting = re.search( "job_+([^:]+)", returned['Value'][1] ).group( 0 ).rstrip() if ( bdJobId == resulting ): gLogger.info( 'The BD job id is the same' ) return S_ERROR( "Same JobID" ) else: gLogger.info( 'The new BD job id is: ', resulting ) return S_OK( resulting ) return S_ERROR( returned ) def jobCompleteStatus( self, jobId ): cmdSeq = "hadoop job -status " + jobId gLogger.info( 'Command Submitted: ', cmdSeq ) return self.sshConnect.sshCallByPort( 100, cmdSeq ) ################################################################################################################# def submitPilotJob( self, tempPath, jobXMLName, proxy ): executableFile = self.__getExecutable( proxy, tempPath ) self.log.verbose( "Executable file path: %s" % executableFile ) if executableFile['OK']: executableFile = executableFile['Value'] else: S_ERROR( executableFile ) if not os.access( executableFile, 5 ): os.chmod( executableFile, 0755 ) self.log.verbose( "Copy file " ) self.dataCopy( tempPath, "/tmp" ) executableFile = "sh -c " + executableFile + " $* > " + tempPath + "/3J5jVr.out 2> " + tempPath + "/3J5jVr.err" self.log.verbose( "Executable file command: ", executableFile ) result = self.sshConnect.sshCallByPort( 100, executableFile ) if not result['OK']: self.log.warn( '===========> SSH BigData Hadoop V.1 result NOT OK' ) self.log.debug( result ) return S_ERROR( result ) else: self.log.debug( 'BigData Hadoop V.1 result OK' ) return S_OK( result ) def __getExecutable( self, proxy, tempPath ): #######################################cambiar self.queueDict[114] = {} self.queueDict[114]['ParametersDict'] = {} self.queueDict[114]['ParametersDict']['CPUTime'] = '1600' self.queueDict[114]['CEName'] = 'CesgaHadoop' self.queueDict[114]['ParametersDict']['Site'] = 'CESGA' ####################################### pilotOptions = self.__getPilotOptions( 114, 1 ) if pilotOptions is None: return S_ERROR( 'Errors in compiling pilot options' ) executable = self.__writePilotScript( tempPath, pilotOptions, proxy, "", tempPath ) result = S_OK( executable ) return result def __writePilotScript( self, workingDirectory, pilotOptions, proxy = '', httpProxy = '', pilotExecDir = '' ): """ Bundle together and write out the pilot executable script, admixt the proxy if given """ try: compressedAndEncodedProxy = '' proxyFlag = 'False' if proxy: compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ) proxyFlag = 'True' compressedAndEncodedPilot = base64.encodestring( bz2.compress( open( self.pilot, "rb" ).read(), 9 ) ) compressedAndEncodedInstall = base64.encodestring( bz2.compress( open( self.install, "rb" ).read(), 9 ) ) except: self.log.exception( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) return S_ERROR( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) localPilot = """#!/bin/bash /usr/bin/env python << EOF # import os, tempfile, sys, shutil, base64, bz2 try: pilotExecDir = '%(pilotExecDir)s' if not pilotExecDir: pilotExecDir = None pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir ) pilotWorkingDirectory = os.path.realpath( pilotWorkingDirectory ) os.chdir( pilotWorkingDirectory ) if %(proxyFlag)s: open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedProxy)s\"\"\" ) ) ) os.chmod("proxy",0600) os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy') open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedPilot)s\"\"\" ) ) ) open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( \"\"\"%(compressedAndEncodedInstall)s\"\"\" ) ) ) os.chmod("%(pilotScript)s",0700) os.chmod("%(installScript)s",0700) if "LD_LIBRARY_PATH" not in os.environ: os.environ["LD_LIBRARY_PATH"]="" if "%(httpProxy)s": os.environ["HTTP_PROXY"]="%(httpProxy)s" os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates') # TODO: structure the output print '===========================================================' print 'Environment of execution host' for key in os.environ.keys(): print key + '=' + os.environ[key] print '===========================================================' except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "python %(pilotScript)s %(pilotOptions)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( pilotWorkingDirectory ) EOF """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, 'compressedAndEncodedPilot': compressedAndEncodedPilot, 'compressedAndEncodedInstall': compressedAndEncodedInstall, 'httpProxy': httpProxy, 'pilotExecDir': pilotExecDir, 'pilotScript': os.path.basename( self.pilot ), 'installScript': os.path.basename( self.install ), 'pilotOptions': ' '.join( pilotOptions ), 'proxyFlag': proxyFlag } fd, name = tempfile.mkstemp( suffix = '_pilotwrapper.py', prefix = 'DIRAC_', dir = workingDirectory ) pilotWrapper = os.fdopen( fd, 'w' ) pilotWrapper.write( localPilot ) pilotWrapper.close() return name def updatePilotStatus( self ): """ Update status of pilots in transient states """ for queue in self.queueDict: ce = self.queueDict[queue]['CE'] ceName = self.queueDict[queue]['CEName'] queueName = self.queueDict[queue]['QueueName'] ceType = self.queueDict[queue]['CEType'] siteName = self.queueDict[queue]['Site'] result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName, 'Queue':queueName, 'GridType':ceType, 'GridSite':siteName, 'Status':TRANSIENT_PILOT_STATUS} ) if not result['OK']: self.log.error( 'Failed to select pilots: %s' % result['Message'] ) continue pilotRefs = result['Value'] if not pilotRefs: continue #print "AT >>> pilotRefs", pilotRefs result = pilotAgentsDB.getPilotInfo( pilotRefs ) if not result['OK']: self.log.error( 'Failed to get pilots info: %s' % result['Message'] ) continue pilotDict = result['Value'] #print "AT >>> pilotDict", pilotDict stampedPilotRefs = [] for pRef in pilotDict: if pilotDict[pRef]['PilotStamp']: stampedPilotRefs.append( pRef + ":::" + pilotDict[pRef]['PilotStamp'] ) else: stampedPilotRefs = list( pilotRefs ) break result = ce.getJobStatus( stampedPilotRefs ) if not result['OK']: self.log.error( 'Failed to get pilots status from CE: %s' % result['Message'] ) continue pilotCEDict = result['Value'] #print "AT >>> pilotCEDict", pilotCEDict for pRef in pilotRefs: newStatus = '' oldStatus = pilotDict[pRef]['Status'] ceStatus = pilotCEDict[pRef] if oldStatus == ceStatus: # Status did not change, continue continue elif ceStatus == "Unknown" and not oldStatus in FINAL_PILOT_STATUS: # Pilot finished without reporting, consider it Aborted newStatus = 'Aborted' elif ceStatus != 'Unknown' : # Update the pilot status to the new value newStatus = ceStatus if newStatus: self.log.info( 'Updating status to %s for pilot %s' % ( newStatus, pRef ) ) result = pilotAgentsDB.setPilotStatus( pRef, newStatus, '', 'Updated by SiteDirector' ) # Retrieve the pilot output now if newStatus in FINAL_PILOT_STATUS: if pilotDict[pRef]['OutputReady'].lower() == 'false' and self.getOutput: self.log.info( 'Retrieving output for pilot %s' % pRef ) pilotStamp = pilotDict[pRef]['PilotStamp'] pRefStamp = pRef if pilotStamp: pRefStamp = pRef + ':::' + pilotStamp result = ce.getJobOutput( pRefStamp ) if not result['OK']: self.log.error( 'Failed to get pilot output: %s' % result['Message'] ) else: output, error = result['Value'] result = pilotAgentsDB.storePilotOutput( pRef, output, error ) if not result['OK']: self.log.error( 'Failed to store pilot output: %s' % result['Message'] ) # The pilot can be in Done state set by the job agent check if the output is retrieved for queue in self.queueDict: ce = self.queueDict[queue]['CE'] if not ce.isProxyValid( 120 ): result = gProxyManager.getPilotProxyFromDIRACGroup( self.genericPilotDN, self.genericPilotGroup, 1000 ) if not result['OK']: return result ce.setProxy( self.proxy, 940 ) ceName = self.queueDict[queue]['CEName'] queueName = self.queueDict[queue]['QueueName'] ceType = self.queueDict[queue]['CEType'] siteName = self.queueDict[queue]['Site'] result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName, 'Queue':queueName, 'GridType':ceType, 'GridSite':siteName, 'OutputReady':'False', 'Status':FINAL_PILOT_STATUS} ) if not result['OK']: self.log.error( 'Failed to select pilots: %s' % result['Message'] ) continue pilotRefs = result['Value'] if not pilotRefs: continue result = pilotAgentsDB.getPilotInfo( pilotRefs ) if not result['OK']: self.log.error( 'Failed to get pilots info: %s' % result['Message'] ) continue pilotDict = result['Value'] if self.getOutput: for pRef in pilotRefs: self.log.info( 'Retrieving output for pilot %s' % pRef ) pilotStamp = pilotDict[pRef]['PilotStamp'] pRefStamp = pRef if pilotStamp: pRefStamp = pRef + ':::' + pilotStamp result = ce.getJobOutput( pRefStamp ) if not result['OK']: self.log.error( 'Failed to get pilot output: %s' % result['Message'] ) else: output, error = result['Value'] result = pilotAgentsDB.storePilotOutput( pRef, output, error ) if not result['OK']: self.log.error( 'Failed to store pilot output: %s' % result['Message'] ) # Check if the accounting is to be sent if self.sendAccounting: result = pilotAgentsDB.selectPilots( {'DestinationSite':ceName, 'Queue':queueName, 'GridType':ceType, 'GridSite':siteName, 'AccountingSent':'False', 'Status':FINAL_PILOT_STATUS} ) if not result['OK']: self.log.error( 'Failed to select pilots: %s' % result['Message'] ) continue pilotRefs = result['Value'] if not pilotRefs: continue result = pilotAgentsDB.getPilotInfo( pilotRefs ) if not result['OK']: self.log.error( 'Failed to get pilots info: %s' % result['Message'] ) continue pilotDict = result['Value'] result = self.sendPilotAccounting( pilotDict ) if not result['OK']: self.log.error( 'Failed to send pilot agent accounting' ) return S_OK() def __getPilotOptions( self, queue, pilotsToSubmit ): """ Prepare pilot options """ queueDict = self.queueDict[queue]['ParametersDict'] pilotOptions = [] setup = gConfig.getValue( "/DIRAC/Setup", "unknown" ) if setup == 'unknown': self.log.error( 'Setup is not defined in the configuration' ) return None pilotOptions.append( '-S %s' % setup ) opsHelper = Operations.Operations( group = self.genericPilotGroup, setup = setup ) #Installation defined? installationName = opsHelper.getValue( "Pilot/Installation", "" ) if installationName: pilotOptions.append( '-V %s' % installationName ) #Project defined? projectName = opsHelper.getValue( "Pilot/Project", "" ) if projectName: pilotOptions.append( '-l %s' % projectName ) else: self.log.info( 'DIRAC project will be installed by pilots' ) #Request a release diracVersion = opsHelper.getValue( "Pilot/Version", [] ) #####borrar diracVersion = "v6r4" if not diracVersion: self.log.error( 'Pilot/Version is not defined in the configuration' ) return None #diracVersion is a list of accepted releases. Just take the first one pilotOptions.append( '-r %s' % diracVersion ) ownerDN = self.genericPilotDN ownerGroup = self.genericPilotGroup result = gProxyManager.requestToken( ownerDN, ownerGroup, pilotsToSubmit * 5 ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) return S_ERROR( ERROR_TOKEN ) ( token, numberOfUses ) = result[ 'Value' ] pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) # Use Filling mode pilotOptions.append( '-M %s' % 5 ) # Debug if self.pilotLogLevel.lower() == 'debug': pilotOptions.append( '-d' ) # CS Servers csServers = gConfig.getValue( "/DIRAC/Configuration/Servers", [] ) pilotOptions.append( '-C %s' % ",".join( csServers ) ) # DIRAC Extensions # extensionsList = CSGlobals.getCSExtensions() # if extensionsList: # pilotOptions.append( '-e %s' % ",".join( extensionsList ) ) # Requested CPU time pilotOptions.append( '-T %s' % queueDict['CPUTime'] ) # CEName pilotOptions.append( '-N %s' % self.queueDict[queue]['CEName'] ) # SiteName pilotOptions.append( '-n %s' % queueDict['Site'] ) if 'ClientPlatform' in queueDict: pilotOptions.append( "-p '%s'" % queueDict['ClientPlatform'] ) if 'SharedArea' in queueDict: pilotOptions.append( "-o '/LocalSite/SharedArea=%s'" % queueDict['SharedArea'] ) group = "lhcb_pilot" if group: pilotOptions.append( '-G %s' % group ) self.log.verbose( "pilotOptions: ", ' '.join( pilotOptions ) ) return pilotOptions def sendPilotAccounting( self, pilotDict ): """ Send pilot accounting record """ for pRef in pilotDict: self.log.verbose( 'Preparing accounting record for pilot %s' % pRef ) pA = PilotAccounting() pA.setEndTime( pilotDict[pRef][ 'LastUpdateTime' ] ) pA.setStartTime( pilotDict[pRef][ 'SubmissionTime' ] ) retVal = CS.getUsernameForDN( pilotDict[pRef][ 'OwnerDN' ] ) if not retVal[ 'OK' ]: userName = '******' self.log.error( "Can't determine username for dn:", pilotDict[pRef][ 'OwnerDN' ] ) else: userName = retVal[ 'Value' ] pA.setValueByKey( 'User', userName ) pA.setValueByKey( 'UserGroup', pilotDict[pRef][ 'OwnerGroup' ] ) result = getSiteForCE( pilotDict[pRef][ 'DestinationSite' ] ) if result['OK'] and result[ 'Value' ].strip(): pA.setValueByKey( 'Site', result['Value'].strip() ) else: pA.setValueByKey( 'Site', 'Unknown' ) pA.setValueByKey( 'GridCE', pilotDict[pRef][ 'DestinationSite' ] ) pA.setValueByKey( 'GridMiddleware', pilotDict[pRef][ 'GridType' ] ) pA.setValueByKey( 'GridResourceBroker', pilotDict[pRef][ 'Broker' ] ) pA.setValueByKey( 'GridStatus', pilotDict[pRef][ 'Status' ] ) if not 'Jobs' in pilotDict[pRef]: pA.setValueByKey( 'Jobs', 0 ) else: pA.setValueByKey( 'Jobs', len( pilotDict[pRef]['Jobs'] ) ) self.log.info( "Adding accounting record for pilot %s" % pilotDict[pRef][ 'PilotID' ] ) retVal = gDataStoreClient.addRegister( pA ) if not retVal[ 'OK' ]: self.log.error( 'Failed to send accounting info for pilot %s' % pRef ) else: # Set up AccountingSent flag result = pilotAgentsDB.setAccountingFlag( pRef ) if not result['OK']: self.log.error( 'Failed to set accounting flag for pilot %s' % pRef ) self.log.info( 'Committing accounting records for %d pilots' % len( pilotDict ) ) result = gDataStoreClient.commit() if result['OK']: for pRef in pilotDict: self.log.verbose( 'Setting AccountingSent flag for pilot %s' % pRef ) result = pilotAgentsDB.setAccountingFlag( pRef ) if not result['OK']: self.log.error( 'Failed to set accounting flag for pilot %s' % pRef ) else: return result return S_OK()
class HadoopV1InteractiveClient( object ): jobid = 0 user = "" ip = "" def __init__( self, User, PublicIP, Port ): #Loop of monitoring each 5 (sec.) self.monitoringloop = 5 self.log = gLogger.getSubLogger( "HadoopV1InteractiveClient" ) self.user = User self.publicIP = PublicIP self.port = Port self.sshConnect = ConnectionUtils( self.user , self.publicIP, self.port ) def jobSubmit( self, tempPath, HadoopInteractiveJob, proxy, HadoopInteractiveJobOutput, HadoopInteractiveJobCommand ): """ Method to submit job """ # if no proxy is supplied, the executable can be submitted directly # otherwise a wrapper script is needed to get the proxy to the execution node # The wrapper script makes debugging more complicated and thus it is # recommended to transfer a proxy inside the executable if possible. HadoopInteractiveJobPath = tempPath + "/" + HadoopInteractiveJob self.log.debug( 'Step6::: Creating jar Path: ', HadoopInteractiveJobPath ) if proxy: self.log.verbose( 'Setting up proxy for payload' ) compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace( '\n', '' ) compressedAndEncodedExecutable = base64.encodestring( bz2.compress( open( HadoopInteractiveJobPath, "rb" ).read(), 9 ) ).replace( '\n', '' ) wrapperContent = """#!/usr/bin/env python # Wrapper script for executable and proxy import os, tempfile, sys, base64, bz2, shutil try: workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'BigDat_' ) os.chdir( workingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(executablepath)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) ) os.chmod('proxy',0600) os.chmod('%(executablepath)s',0700) os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy') except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "%(executable)s > %(HadoopInteractiveJobOutput)s 2>&1" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( workingDirectory ) """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \ 'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \ 'executablepath': HadoopInteractiveJobPath, \ 'executable': HadoopInteractiveJobCommand, \ 'HadoopInteractiveJobOutput': HadoopInteractiveJobOutput } fd, name = tempfile.mkstemp( suffix = '_wrapper.py', prefix = 'BigDat_', dir = tempPath ) wrapper = os.fdopen( fd, 'w' ) wrapper.write( wrapperContent ) wrapper.close() self.log.debug( 'Step7::: Creating payload: ' ) submitFile = name wrapperContent = """#!/usr/bin/env python # Wrapper script for executable and proxy import os, tempfile, sys, base64, bz2, shutil, getopt,re def main(argv): inputfile = '' command = '' try: opts, args = getopt.getopt(argv,'h:c:',['']) except getopt.GetoptError: print 'name.py -c <command>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'name.py -c <command>' sys.exit() elif opt in ('-c', '--command'): command = arg if (command == 'step1'): cmd = 'grep "Running job:" %(HadoopInteractiveJobOutput)s' returned = os.system(cmd) if __name__ == '__main__': main(sys.argv[1:]) """ % { 'HadoopInteractiveJobOutput': HadoopInteractiveJobOutput } fd, name = tempfile.mkstemp( suffix = '_getInfo.py', prefix = 'BigDat_', dir = tempPath ) wrapper = os.fdopen( fd, 'w' ) wrapper.write( wrapperContent ) wrapper.close() submitFile2 = name self.log.debug( 'Step8::: Creating wrapper: ' ) else: # no proxy submitFile = HadoopInteractiveJob # Copy the executable self.log.debug( 'Step9::: Moving Wrapper and Payload: ' ) os.chmod( submitFile, stat.S_IRUSR | stat.S_IXUSR ) sFile = os.path.basename( submitFile ) returned = self.sshConnect.scpCallByPort( 100, submitFile, '%s/%s' % ( tempPath, os.path.basename( submitFile ) ) ) returned2 = self.sshConnect.scpCallByPort( 100, submitFile2, '%s/%s' % ( tempPath, os.path.basename( submitFile2 ) ) ) if not returned['OK']: return S_ERROR( returned['Message'] ) if not returned2['OK']: return S_ERROR( returned2['Message'] ) # submit submitFile to the batch system cmd = submitFile self.log.verbose( 'BigData submission command: %s' % ( cmd ) ) thread1 = InteractiveJobLaunchPortThread( self.user, self.publicIP , cmd, self.port ) #thread2 = InteractiveJobMonitorThread( self.user, self.publicIP, self.monitoringloop, # thread1, tempPath + HadoopInteractiveJobOutput, submitFile2 ) thread1.start() time.sleep( 5 ) if not thread1.isAlive(): self.log.warn( '===========> SSH BigData Hadoop-HadoopInteractive thread V.1 result NOT OK' ) return S_ERROR( "Error launching Hadoop-HadoopInteractive Thread" ) self.log.debug( 'BigData Hadoop-HadoopInteractive V.1 result OK', thread1.getName() ) self.log.debug( 'Step10::: Stop process for get the JobID: ' ) cmd = '/bin/chmod 555 ' + submitFile2 self.sshConnect.sshCallByPort( 100, cmd ) cmd = submitFile2 + ' -c step1' returned = self.sshConnect.sshCallByPort( 100, cmd ) self.log.debug( 'Step11:::InteractiveJobMonitorThread:step1:getJobID:', returned ) if returned != None: if ( returned['Value'][1] != "" ): resulting = re.search( "job_+([^:]+)", returned['Value'][1] ) if ( resulting != None ): self.log.debug( 'Step12:::InteractiveJobMonitorThread:step1:JobID:', resulting.group( 0 ).rstrip() ) return S_OK( resulting.group( 0 ).rstrip() ) def getData( self, temSRC, tempDest ): cmdSeq = "hadoop dfs - get " + temSRC + " " + tempDest return self.sshConnect.sshCallByPort( 86400, cmdSeq ) def delData( self, tempPath ): cmdSeq = "rm - Rf " + tempPath return self.sshConnect.sshCallByPort( 100, cmdSeq ) def dataCopy( self, tempPath, tmpSandBoxDir ): return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir ) def getdata( self, tempPath, tmpSandBoxDir ): return self.sshConnect.scpCallByPort( 100, tempPath, tmpSandBoxDir, False ) def jobStatus( self, jobId, user, host ): cmdSeq = "ssh - l " + user + " " + host + " 'hadoop job -list all | awk -v job_id=" + jobId.strip() + " "\ " '\"'\"'BEGIN{OFS=\"\t\"; FS=\"\t\"; final_state=\"Unknown\"} "\ "$0 == \"States are:\" {getline; for(i=1;i<=NF;i++) { split($i,s,\" \"); states[s[3]] = s[1] }} $1==job_id { final_state=states[$2]; exit} END{print final_state}'\"'\"" gLogger.info( 'Command Submitted: ', cmdSeq ) return self.sshConnect.sshOnlyCall( 100, cmdSeq )