def __downloadJobDescriptionXML(jobID, basepath): """ Downloads the jobDescription.xml file into the temporary directory created. """ jdXML = Dirac() jdXML.getInputSandbox(jobID, basepath)
def __downloadJobDescriptionXML(jobID, basepath): """ Downloads the jobDescription.xml file into the temporary directory created. """ from DIRAC.Interfaces.API.Dirac import Dirac jdXML = Dirac() jdXML.getInputSandbox(jobID, basepath)
def __downloadJobDescriptionXML(jobID, basepath): """ Downloads the jobDescription.xml file into the temporary directory created. """ from DIRAC.Interfaces.API.Dirac import Dirac jdXML = Dirac() jdXML.getInputSandbox(jobID, basepath)
def main(): Script.registerSwitch("D:", "Dir=", "Store the output in this directory") # Registering arguments will automatically add their description to the help menu Script.registerArgument(["JobID: DIRAC Job ID"]) sws, args = Script.parseCommandLine(ignoreErrors=True) from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 errorList = [] outputDir = None for sw, v in sws: if sw in ("D", "Dir"): outputDir = v for job in parseArguments(args): result = dirac.getInputSandbox(job, outputDir=outputDir) if result["OK"]: if os.path.exists("InputSandbox%s" % job): print("Job input sandbox retrieved in InputSandbox%s/" % (job)) else: errorList.append((job, result["Message"])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRAC.exit(exitCode)
def main(): Script.registerSwitch("D:", "Dir=", "Store the output in this directory") Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp(exitCode=1) from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 errorList = [] outputDir = None for sw, v in Script.getUnprocessedSwitches(): if sw in ('D', 'Dir'): outputDir = v for job in parseArguments(args): result = dirac.getInputSandbox(job, outputDir=outputDir) if result['OK']: if os.path.exists('InputSandbox%s' % job): print('Job input sandbox retrieved in InputSandbox%s/' % (job)) else: errorList.append((job, result['Message'])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRAC.exit(exitCode)
def submitNewBigJob( self ): result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) tempPath = self.__tmpSandBoxDir dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = self.__tmpSandBoxDir + "/InputSandbox" + str( self.__jobID ) HiveV1Cli = HiveV1Client( self.__User , self.__publicIP ) returned = HiveV1Cli.dataCopy( moveData, self.__tmpSandBoxDir ) self.log.info( 'Copy the job contain to the Hadoop Master with HIVE: ', returned ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HiveJobOutput = str( self.__jobID ) + "_" + executableFile + "_out" returned = HiveV1Cli.jobSubmit( tempPath, HiveJob, proxy['chain'], HiveJobOutput ) self.log.info( 'Launch Hadoop-Hive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-Hive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
def submitNewBigPilot(self): tempPath = self.__tmpSandBoxDir + str(self.__jobID) dirac = Dirac() if not os.path.exists(tempPath): os.makedirs(tempPath) settingJobSandBoxDir = dirac.getInputSandbox(self.__jobID, tempPath) self.log.info("Writting temporal SandboxDir in Server", settingJobSandBoxDir) jobXMLName = "job:" + str(self.__jobID) + ".xml" with open(os.path.join(tempPath, jobXMLName), "wb") as temp_file: temp_file.write(self.jobWrapper()) self.log.info("Writting temporal Hadoop Job.xml") HadoopV1cli = HadoopV1Client(self.__User, self.__publicIP, self.__Port) # returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir ) # self.log.info( 'Copy the job contain to the Hadoop Master: ', returned ) jobInfo = jobDB.getJobAttributes(self.__jobID) if not jobInfo["OK"]: return S_ERROR(jobInfo["Value"]) proxy = "" jobInfo = jobInfo["Value"] if gProxyManager.userHasProxy(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]): proxy = gProxyManager.downloadProxyToFile(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]) else: proxy = self.__requestProxyFromProxyManager(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]) returned = HadoopV1cli.submitPilotJob(tempPath, jobXMLName, proxy["chain"]) self.log.info("Launch Hadoop pilot to the Hadoop Master: ", returned) if not returned["OK"]: return S_ERROR(returned["Value"]) else: self.log.info("Hadoop Job ID: ", returned["Value"]) return S_OK(returned["Value"])
def submitNewBigJob( self ): tempPath = self.__tmpSandBoxDir + str( self.__jobID ) dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) jobXMLName = "job:" + str( self.__jobID ) + '.xml' with open( os.path.join( tempPath, jobXMLName ), 'wb' ) as temp_file: temp_file.write( self.jobWrapper() ) self.log.info( 'Writting temporal Hadoop Job.xml' ) HadoopV1cli = HadoopV2Client( self.__User , self.__publicIP ) returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir ) self.log.info( 'Copy the job contain to the Hadoop Master: ', returned ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) returned = HadoopV1cli.jobSubmit( tempPath, jobXMLName, proxy['chain'] ) self.log.info( 'Launch Hadoop job to the Hadoop Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() if len( args ) < 1: Script.showHelp() from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() exitCode = 0 errorList = [] outputDir = None for sw, v in Script.getUnprocessedSwitches(): if sw in ( 'D', 'Dir' ): outputDir = v for job in args: result = dirac.getInputSandbox( job, outputDir = outputDir ) if result['OK']: if os.path.exists( 'InputSandbox%s' % job ): print 'Job input sandbox retrieved in InputSandbox%s/' % ( job ) else: errorList.append( ( job, result['Message'] ) ) exitCode = 2 for error in errorList: print "ERROR %s: %s" % error DIRAC.exit( exitCode )
res = w.getJobSummary(int(j)) if not res["OK"]: print res["Message"] sys.stderr.write( time.strftime("%d/%m/%y %H:%M", time.localtime()) + " => " + j + " => " + res["Message"] + "\n" ) break summary = res["Value"] status_j = summary["Status"] # Job we want to handle if status_j in status_to_handle: # retrieve the INPUT sandbox res = d.getInputSandbox(j, dir_temp) if not res["OK"]: print res["Message"] if "No Input sandbox registered for job" in res["Message"]: jobid_handled.append(j) # notify the job as "already handled" else: sys.stderr.write( time.strftime("%d/%m/%y %H:%M", time.localtime()) + " => " + j + " => " + res["Message"] + "\n" )
def submitNewBigJob( self ): #1.- Creamos carpeta temporal self.log.debug( 'Step1::: mkdir temp folder' ) tempPath = self.__tmpSandBoxDir + str( self.__jobID ) + "/" dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) #2.- Introducimos el contenido del inputsandbox en la carpeta temporal self.log.debug( 'Step2::: download inputsand to temp folder' ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = tempPath + "/InputSandbox" + str( self.__jobID ) #3.- Move the data to client self.log.debug( 'Step2::: download inputsandbox to temp folder' ) HadoopV1InteractiveCli = HadoopV1InteractiveClient( self.__User , self.__publicIP, self.__Port ) returned = HadoopV1InteractiveCli.dataCopy( tempPath, self.__tmpSandBoxDir ) self.log.debug( 'Returned of copy the job contain to the Hadoop Master with HadoopInteractive::: ', returned ) #3.- Get executable file result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) self.log.debug( 'Step3::: Get executable file: ', executableFile ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HadoopInteractiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HadoopInteractiveJobCommand = "InputSandbox" + str( self.__jobID ) + "/" + executableFile + " " + self.__JobName HadoopInteractiveJobOutput = tempPath + str( self.__jobID ) + "_" + executableFile + "_out" #4.- Creating second part of the job name if ( len( re.split( " ", self.__JobName ) ) > 1 ): #(name for random writter with -D)name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] + " " + re.split( " ", self.__JobName )[2] name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] #(name for random writter with -D)output_job = moveData + "/" + re.split( " ", self.__JobName )[3] #(name for random writter with -D)cfg_job = "" #(name for random writter with -D)if ( len( re.split( " ", self.__JobName ) ) > 4 ): #(name for random writter with -D) cfg_job = moveData + "/" + re.split( " ", self.__JobName )[4] #5.- Parsing execution command #cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + output_job + " " + cfg_job cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + tempPath + "/InputSandbox" + str( self.__jobID ) + "/" + "/dataset-USC-a-grep '[and]+'" else: dataset = re.split( "/", self.__Dataset ) count = 0 datasetname = "" for dir in dataset: count = count + 1 if ( count > 2 ): datasetname = datasetname + "/" + dir cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + self.__JobName + " " + datasetname + " " + tempPath + "/" + self.__JobName.replace( " ", "" ) + "_" + str( self.__jobID ) self.log.debug( 'Step4::: Making CMD for submission: ', cmd ) self.log.debug( 'Step5::: Submit file to hadoop: ' ) returned = HadoopV1InteractiveCli.jobSubmit( tempPath, HadoopInteractiveJob, proxy['chain'], HadoopInteractiveJobOutput, cmd ) self.log.info( 'Launch Hadoop-HadoopInteractive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-HadoopInteractive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp(exitCode=1) from DIRAC.Interfaces.API.Dirac import Dirac, parseArguments dirac = Dirac() exitCode = 0 errorList = [] outputDir = None for sw, v in Script.getUnprocessedSwitches(): if sw in ('D', 'Dir'): outputDir = v for job in parseArguments(args): result = dirac.getInputSandbox(job, outputDir=outputDir) if result['OK']: if os.path.exists('InputSandbox%s' % job): print('Job input sandbox retrieved in InputSandbox%s/' % (job)) else: errorList.append((job, result['Message'])) exitCode = 2 for error in errorList: print("ERROR %s: %s" % error) DIRAC.exit(exitCode)
res = w.getJobSummary(int(j)) if not res['OK']: print res['Message'] sys.stderr.write( time.strftime('%d/%m/%y %H:%M', time.localtime()) + " => " + j + " => " + res['Message'] + "\n") break summary = res['Value'] status_j = summary['Status'] # Job we want to handle if status_j in status_to_handle: # retrieve the INPUT sandbox res = d.getInputSandbox(j, dir_temp) if not res['OK']: print res['Message'] if "No Input sandbox registered for job" in res['Message']: jobid_handled.append(j) # notify the job as "already handled" else: sys.stderr.write( time.strftime('%d/%m/%y %H:%M', time.localtime()) + " => " + j + " => " + res['Message'] + "\n") else: # check if 'jobmeta.inf' is present (if not it's not a PIPELINE job ) if not os.path.isfile(dir_temp + "/InputSandbox" + j + "/jobmeta.inf"):