def sendJobAccounting(self, dataFromBDSoft, jobId): accountingReport = AccountingJob() accountingReport.setStartTime() result = jobDB.getJobAttributes(jobId) getting = result["Value"] if dataFromBDSoft["CPUTime"] == 0: cpuTime = 0 if getting["EndExecTime"] != "None": epoch = datetime(1970, 1, 1) td = datetime.strptime(getting["EndExecTime"], "%Y-%m-%d %H:%M:%S") - epoch EndExecTime = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 1e6 td = datetime.strptime(getting["SubmissionTime"], "%Y-%m-%d %H:%M:%S") - epoch SubmissionTime = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 1e6 cpuTime = EndExecTime - SubmissionTime else: cpuTime = dataFromBDSoft["CPUTime"] / 1000 acData = { "User": getting["Owner"], "UserGroup": getting["OwnerGroup"], "JobGroup": "cesga", "JobType": "User", "JobClass": "unknown", "ProcessingType": "unknown", "FinalMajorStatus": getting["Status"], "FinalMinorStatus": getting["MinorStatus"], "CPUTime": cpuTime, "Site": getting["Site"], # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model) "NormCPUTime": 0, "ExecTime": cpuTime, "InputDataSize": dataFromBDSoft["InputDataSize"], "OutputDataSize": dataFromBDSoft["OutputDataSize"], "InputDataFiles": dataFromBDSoft["InputDataFiles"], "OutputDataFiles": len(self.fileList), "DiskSpace": 0, "InputSandBoxSize": 0, "OutputSandBoxSize": self.outputSandboxSize, "ProcessedEvents": 0, } accountingReport.setEndTime() accountingReport.setValuesFromDict(acData) self.log.debug("Info for accounting: ", acData) result = accountingReport.commit() self.log.debug("Accounting insertion: ", result) return result
def submitNewBigJob( self ): result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) tempPath = self.__tmpSandBoxDir dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = self.__tmpSandBoxDir + "/InputSandbox" + str( self.__jobID ) HiveV1Cli = HiveV1Client( self.__User , self.__publicIP ) returned = HiveV1Cli.dataCopy( moveData, self.__tmpSandBoxDir ) self.log.info( 'Copy the job contain to the Hadoop Master with HIVE: ', returned ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HiveJobOutput = str( self.__jobID ) + "_" + executableFile + "_out" returned = HiveV1Cli.jobSubmit( tempPath, HiveJob, proxy['chain'], HiveJobOutput ) self.log.info( 'Launch Hadoop-Hive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-Hive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
def submitNewBigPilot(self): tempPath = self.__tmpSandBoxDir + str(self.__jobID) dirac = Dirac() if not os.path.exists(tempPath): os.makedirs(tempPath) settingJobSandBoxDir = dirac.getInputSandbox(self.__jobID, tempPath) self.log.info("Writting temporal SandboxDir in Server", settingJobSandBoxDir) jobXMLName = "job:" + str(self.__jobID) + ".xml" with open(os.path.join(tempPath, jobXMLName), "wb") as temp_file: temp_file.write(self.jobWrapper()) self.log.info("Writting temporal Hadoop Job.xml") HadoopV1cli = HadoopV1Client(self.__User, self.__publicIP, self.__Port) # returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir ) # self.log.info( 'Copy the job contain to the Hadoop Master: ', returned ) jobInfo = jobDB.getJobAttributes(self.__jobID) if not jobInfo["OK"]: return S_ERROR(jobInfo["Value"]) proxy = "" jobInfo = jobInfo["Value"] if gProxyManager.userHasProxy(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]): proxy = gProxyManager.downloadProxyToFile(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]) else: proxy = self.__requestProxyFromProxyManager(jobInfo["OwnerDN"], jobInfo["OwnerGroup"]) returned = HadoopV1cli.submitPilotJob(tempPath, jobXMLName, proxy["chain"]) self.log.info("Launch Hadoop pilot to the Hadoop Master: ", returned) if not returned["OK"]: return S_ERROR(returned["Value"]) else: self.log.info("Hadoop Job ID: ", returned["Value"]) return S_OK(returned["Value"])
def submitNewBigJob( self ): tempPath = self.__tmpSandBoxDir + str( self.__jobID ) dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) jobXMLName = "job:" + str( self.__jobID ) + '.xml' with open( os.path.join( tempPath, jobXMLName ), 'wb' ) as temp_file: temp_file.write( self.jobWrapper() ) self.log.info( 'Writting temporal Hadoop Job.xml' ) HadoopV1cli = HadoopV2Client( self.__User , self.__publicIP ) returned = HadoopV1cli.dataCopy( tempPath, self.__tmpSandBoxDir ) self.log.info( 'Copy the job contain to the Hadoop Master: ', returned ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) returned = HadoopV1cli.jobSubmit( tempPath, jobXMLName, proxy['chain'] ) self.log.info( 'Launch Hadoop job to the Hadoop Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
def monitoring( self, loop, parentthread, output ): self.initialTiming = os.times() accountingReport = AccountingJob() accountingReport.setStartTime() numberJobsFlag = True numberJobs = 0 numberStartedJobsDict = {} numberEndingJobsDict = {} job_pattern = re.compile( 'Job =.*?,' ) job_pattern_2 = re.compile( 'Job =.*?\n' ) jobid = int( re.split( "_", re.split( "/", output )[int( len( re.split( "/", output ) ) - 1 )] )[0] ) cmd = '/bin/chmod 555 ' + self.getinfo returned = self.commandLaunch( cmd ) while parentthread.isAlive(): time.sleep( loop ) if numberJobsFlag: cmd = self.getinfo + ' -c step1' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step1:numJobs:', returned ) if returned != None: if ( returned['Value'][1] != "" ): if re.split( "=", returned['Value'][1] )[1].strip().isdigit(): numberJobs = int( re.split( "=", returned['Value'][1] )[1] ) if ( numberJobs != 0 ): numberJobsFlag = False BigDataDB.setJobStatus( jobid, "Running" ) else: cmd = self.getinfo + ' -c step2' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step2:startedJobs:', returned ) if returned != "": if ( returned['Value'][1] != "" ): startedJobs = job_pattern.findall( returned['Value'][1] ) self.log.info( 'step2:startedJobs:', startedJobs ) cmd = self.getinfo + ' -c step3' returned = self.commandLaunch( cmd ) self.log.info( 'InteractiveJobMonitorThread:step3:endedJobs:', returned ) if returned != "": if ( returned['Value'][1] != "" ): finishedJobs = job_pattern_2.findall( returned['Value'][1] ) self.log.info( 'step3:finishedJobs:', finishedJobs ) if ( len( finishedJobs ) == numberJobs ): BigDataDB.setJobStatus( jobid, "Done" ) BigDataDB.setHadoopID( jobid, finishedJobs ) self.__updateSandBox( jobid, output ) #Update Accounting EXECUTION_RESULT = {} EXECUTION_RESULT['CPU'] = [] finalStat = os.times() for i in range( len( finalStat ) ): EXECUTION_RESULT['CPU'].append( finalStat[i] - self.initialTiming[i] ) utime, stime, cutime, cstime, elapsed = EXECUTION_RESULT['CPU'] cpuTime = utime + stime + cutime + cstime execTime = elapsed result = jobDB.getJobAttributes( jobid ) getting = result['Value'] acData = { 'User' : getting['Owner'], 'UserGroup' : getting['OwnerGroup'], 'JobGroup' : 'cesga', 'JobType' : 'User', 'JobClass' : 'unknown', 'ProcessingType' : 'unknown', 'FinalMajorStatus' : getting['Status'], 'FinalMinorStatus' : getting['MinorStatus'], 'CPUTime' : cpuTime, 'Site' : getting['Site'], # Based on the factor to convert raw CPU to Normalized units (based on the CPU Model) 'NormCPUTime' : 0, 'ExecTime' : cpuTime, 'InputDataSize' : 0, 'OutputDataSize' : 0, 'InputDataFiles' : 0, 'OutputDataFiles' : 0, 'DiskSpace' : 0, 'InputSandBoxSize' : 0, 'OutputSandBoxSize' : 0, 'ProcessedEvents' : 0 } accountingReport.setEndTime() accountingReport.setValuesFromDict( acData ) result = accountingReport.commit()
def execute( self ): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Count Pending Jobs 3.- Submit Jobs """ self.__checkSubmitPools() bigDataJobsToSubmit = {} bigDataJobIdsToSubmit = {} for directorName, directorDict in self.directors.items(): self.log.verbose( 'Checking Director:', directorName ) self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints ) for runningEndPointName in directorDict['director'].runningEndPoints: runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName] NameNode = runningEndPointDict['NameNode'] jobsByEndPoint = 0 result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint ) jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint'] bigDataJobs = 0 if jobsByEndPoint >= jobLimitsEndPoint: self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) ) continue else: bigDataJobs = jobLimitsEndPoint - jobsByEndPoint requirementsDict = runningEndPointDict['Requirements'] self.log.info( 'Requirements Dict: ', requirementsDict ) result = taskQueueDB.getMatchingTaskQueues( requirementsDict ) if not result['OK']: self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] ) return result taskQueueDict = result['Value'] self.log.info( 'Task Queues Dict: ', taskQueueDict ) jobs = 0 priority = 0 cpu = 0 jobsID = 0 self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs ) for tq in taskQueueDict: jobs += taskQueueDict[tq]['Jobs'] priority += taskQueueDict[tq]['Priority'] cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime'] #Matching of Jobs with BigData Softwares #This process is following the sequence: #Retrieve a job from taskqueueDict #Get job name and try to match with the resources #If not match store the var pendingTaskQueueJobs for the #next iteration # #This matching is doing with the following JobName Pattern # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName #extract a job from the TaskQueue if tq not in self.pendingTaskQueueJobs.keys(): self.pendingTaskQueueJobs[tq] = {} getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] ) if not getJobFromTaskQueue['OK']: self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] ) return getJobFromTaskQueue jobInfo = getJobFromTaskQueue['Value'] jobID = jobInfo['jobId'] jobAttrInfo = jobDB.getJobAttributes( jobID ) if not jobAttrInfo['OK']: self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] ) return jobAttrInfo jobInfoUniq = jobAttrInfo['Value'] jobName = jobInfoUniq['JobName'] self.pendingTaskQueueJobs[tq][jobID] = jobName result = jobDB.getJobJDL( jobID, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobID ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID, 'JobName': jobName, 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobID] else: self.log.error( jobsToSubmit ) self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs ) for tq in self.pendingTaskQueueJobs.keys(): for jobid in self.pendingTaskQueueJobs[tq].keys(): result = jobDB.getJobJDL( jobid, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue #do the match with the runningEndPoint jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobid ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid, 'JobName': self.pendingTaskQueueJobs[tq][jobid], 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobid] else: self.log.error( jobsToSubmit ) if not jobs and not self.pendingTaskQueueJobs: self.log.info( 'No matching jobs for %s found, skipping' % NameNode ) continue self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit ) for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items(): for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items(): if self.directors[directorName]['isEnabled']: self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) ) director = self.directors[directorName]['director'] pool = self.pools[self.directors[directorName]['pool']] jobIDs = JobsToSubmitDict[runningEndPointName]['JobId'] jobName = JobsToSubmitDict[runningEndPointName]['JobName'] endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint'] runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName'] NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode'] BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware'] BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion'] HLLName = JobsToSubmitDict[runningEndPointName]['HLLName'] HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion'] PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP'] User = JobsToSubmitDict[runningEndPointName]['User'] Port = JobsToSubmitDict[runningEndPointName]['Port'] UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot'] IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive'] Arguments = JobsToSubmitDict[runningEndPointName]['Arguments'] numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit'] ret = pool.generateJobAndQueueIt( director.submitBigDataJobs, args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode, BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion, PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ), oCallback = self.callBack, oExceptionCallback = director.exceptionCallBack, blocking = False ) if not ret['OK']: # Disable submission until next iteration self.directors[directorName]['isEnabled'] = False else: time.sleep( self.am_getOption( 'ThreadStartDelay' ) ) if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() return DIRAC.S_OK()
def submitNewBigJob( self ): #1.- Creamos carpeta temporal self.log.debug( 'Step1::: mkdir temp folder' ) tempPath = self.__tmpSandBoxDir + str( self.__jobID ) + "/" dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) #2.- Introducimos el contenido del inputsandbox en la carpeta temporal self.log.debug( 'Step2::: download inputsand to temp folder' ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = tempPath + "/InputSandbox" + str( self.__jobID ) #3.- Move the data to client self.log.debug( 'Step2::: download inputsandbox to temp folder' ) HadoopV1InteractiveCli = HadoopV1InteractiveClient( self.__User , self.__publicIP, self.__Port ) returned = HadoopV1InteractiveCli.dataCopy( tempPath, self.__tmpSandBoxDir ) self.log.debug( 'Returned of copy the job contain to the Hadoop Master with HadoopInteractive::: ', returned ) #3.- Get executable file result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) self.log.debug( 'Step3::: Get executable file: ', executableFile ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HadoopInteractiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HadoopInteractiveJobCommand = "InputSandbox" + str( self.__jobID ) + "/" + executableFile + " " + self.__JobName HadoopInteractiveJobOutput = tempPath + str( self.__jobID ) + "_" + executableFile + "_out" #4.- Creating second part of the job name if ( len( re.split( " ", self.__JobName ) ) > 1 ): #(name for random writter with -D)name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] + " " + re.split( " ", self.__JobName )[2] name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] #(name for random writter with -D)output_job = moveData + "/" + re.split( " ", self.__JobName )[3] #(name for random writter with -D)cfg_job = "" #(name for random writter with -D)if ( len( re.split( " ", self.__JobName ) ) > 4 ): #(name for random writter with -D) cfg_job = moveData + "/" + re.split( " ", self.__JobName )[4] #5.- Parsing execution command #cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + output_job + " " + cfg_job cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + tempPath + "/InputSandbox" + str( self.__jobID ) + "/" + "/dataset-USC-a-grep '[and]+'" else: dataset = re.split( "/", self.__Dataset ) count = 0 datasetname = "" for dir in dataset: count = count + 1 if ( count > 2 ): datasetname = datasetname + "/" + dir cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + self.__JobName + " " + datasetname + " " + tempPath + "/" + self.__JobName.replace( " ", "" ) + "_" + str( self.__jobID ) self.log.debug( 'Step4::: Making CMD for submission: ', cmd ) self.log.debug( 'Step5::: Submit file to hadoop: ' ) returned = HadoopV1InteractiveCli.jobSubmit( tempPath, HadoopInteractiveJob, proxy['chain'], HadoopInteractiveJobOutput, cmd ) self.log.info( 'Launch Hadoop-HadoopInteractive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-HadoopInteractive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )