def _getStateTime(job, status): """Returns the timestamps for 'running' or 'completed' by extracting their equivalent timestamps from the loggingInfo.""" # Now private to stop server cross-talk from user thread. Since updateStatus calles # this method whether called itself by the user thread or monitoring thread. # Now don't use hook but define our own private version # used in monitoring loop... messy but works. if job.status != status: b_list = ['running', 'completing', 'completed', 'failed'] backend_final = ['failed', 'completed'] # backend stamps if not job.subjobs and status in b_list: for childstatus in b_list: if job.backend.id: logger.debug("Accessing getStateTime() in diracAPI") if childstatus in backend_final: be_statetime = execute("getStateTime(%d,\'%s\')" % (job.backend.id, childstatus)) job.time.timestamps["backend_final"] = be_statetime logger.debug("Wrote 'backend_final' to timestamps.") break else: time_str = "backend_" + childstatus if time_str not in job.time.timestamps: be_statetime = execute("getStateTime(%d,\'%s\')" % (job.backend.id, childstatus)) job.time.timestamps["backend_" + childstatus] = be_statetime logger.debug("Wrote 'backend_%s' to timestamps.", childstatus) if childstatus == status: break logger.debug("_getStateTime(job with id: %d, '%s') called.", job.id, job.status) else: logger.debug("Status changed from '%s' to '%s'. No new timestamp was written", job.status, status)
def debug(self): '''Obtains some (possibly) useful DIRAC debug info. ''' # check services cmd = 'getServicePorts()' try: result = execute(cmd, cred_req=self.credential_requirements) except GangaDiracError as err: logger.warning('Could not obtain services: %s' % str(err)) return services = result for category in services: system, service = category.split('/') cmd = "ping('%s','%s')" % (system, service) try: result = execute(cmd, cred_req=self.credential_requirements) msg = 'OK.' except GangaDiracError as err: msg = '%s' % err logger.info('%s: %s' % (category, msg)) # get pilot info for this job if not isinstance(self.id, int): return j = self.getJobObject() cwd = os.getcwd() debug_dir = j.getDebugWorkspace().getPath() cmd = "getJobPilotOutput(%d,'%s')" % (self.id, debug_dir) try: result = execute(cmd, cred_req=self.credential_requirements) logger.info('Pilot Info: %s/pilot_%d/std.out.' % (debug_dir, self.id)) except GangaDiracError as err: logger.error("%s" % err)
def debug(self): '''Obtains some (possibly) useful DIRAC debug info. ''' # check services cmd = 'getServicePorts()' try: result = execute(cmd, cred_req=self.credential_requirements) except GangaDiracError as err: logger.warning('Could not obtain services: %s' % str(err)) return services = result for category in services: system, service = category.split('/') cmd = "ping('%s','%s')" % (system, service) try: result = execute(cmd, cred_req=self.credential_requirements) msg = 'OK.' except GangaDiracError as err: msg = '%s' % err logger.info('%s: %s' % (category, msg)) # get pilot info for this job if not isinstance(self.id, int): return j = self.getJobObject() cwd = os.getcwd() debug_dir = j.getDebugWorkspace().getPath() cmd = "getJobPilotOutput(%d,'%s')" % (self.id, debug_dir) try: result = execute(cmd, cred_req=self.credential_requirements) logger.info('Pilot Info: %s/pilot_%d/std.out.' % (debug_dir, self.id)) except GangaDiracError as err: logger.error("%s" % err)
def test_execute_timeouts(): # Test timeouts assert execute('while true; do sleep 1; done', shell=True, timeout=1) == 'Command timed out!' assert execute('while True: pass', timeout=1) == 'Command timed out!' # Test timeout doesn't hinder a normal command assert execute('import os\nprint(os.getcwd())', timeout=10, cwd=os.getcwd()).strip() == os.getcwd() assert execute('cd "{0}"; pwd'.format(os.getcwd()), shell=True, timeout=10, cwd=os.getcwd()).strip() == os.getcwd() # Test timeout doesn't delay normal command assert timeit.timeit(''' import os from GangaDirac.Lib.Utilities.DiracUtilities import execute execute('import os\\nprint(os.getcwd())',timeout=10, cwd=os.getcwd()) ''', number=1) < 11 assert timeit.timeit(''' from GangaDirac.Lib.Utilities.DiracUtilities import execute execute('cd "{0}"; pwd',shell=True, timeout=10) '''.format(os.getcwd()), number=1) < 11
def accessURL(self, thisSE=""): """ Attempt to find an accessURL which corresponds to an SE at this given site The given site will be provided either by thisSE which takes precedent or by the value stored in DiracFile.defaultSE """ # If we don't have subfiles then we need to make sure that the replicas # are known if len(self._remoteURLs) == 0 and len(self.subfiles) == 0: self.getReplicas() # Now we have to match the replicas to find one at the if len(self.subfiles) == 0: files_URLs = self._remoteURLs this_accessURL = "" for this_SE in files_URLs.keys(): this_URL = files_URLs.get(this_SE) these_sites_output = execute('getSitesForSE("%s")' % str(this_SE)) if thisSE == "": default_site = execute('getSiteForSE("%s")' % self.defaultSE) else: default_site = thisSE if these_sites_output.get("OK", False): these_sites = these_sites_output.get("Value") for this_site in these_sites: if type(default_site) == type([]): hasMatched = False for this_Site_in_SE in default_site: if this_site == this_Site_in_SE: hasMatched = True break if hasMatched: break elif type(default_site) == type(""): if this_site == default_site: this_accessURL = this_URL break if this_accessURL != "": break # Cannot find an accessURL if this_accessURL == "": return [] # Currently only written to cope with 1 replica per DIRAC file # I think adding multiple accessURL for the same file at 1 site # adds confusion return [this_accessURL] else: # For all subfiles request the accessURL, 1 URL per LFN _accessURLs = [] for i in self.subfiles: for j in i.accessURL(): _accessURLs.append(j) return _accessURLs
def test_execute_env(): # Test correctly picking up env env = { 'ALEX': '/hello/world', 'PATH': os.environ.get('PATH', ''), 'PYTHONPATH': os.environ.get('PYTHONPATH', '') } # env.update(os.environ) assert execute('echo $ALEX', shell=True, env=env).strip() == '/hello/world' assert execute('import os\nprint(os.environ.get("ALEX","BROKEN"))', env=env, python_setup='#').strip() == '/hello/world' # Test env not updated by default execute('export NEWTEST=/new/test', shell=True, env=env) assert 'NEWTEST' not in env execute('import os\nos.environ["NEWTEST"]="/new/test"', env=env, python_setup='#') assert 'NEWTEST' not in env # Test updating of env execute('export NEWTEST=/new/test', shell=True, env=env, update_env=True) assert 'NEWTEST' in env del env['NEWTEST'] assert 'NEWTEST' not in env execute('import os\nos.environ["NEWTEST"]="/new/test"', env=env, python_setup='#', update_env=True) assert 'NEWTEST' in env
def test_execute(): # Test shell vs python mode assert execute('import os\nprint(os.getcwd())', cwd=os.getcwd()).strip() == os.getcwd() assert execute('cd "{0}"; pwd'.format(os.getcwd()), shell=True, cwd=os.getcwd()).strip() == os.getcwd()
def getAccessURLs(lfns, defaultSE='', protocol='', credential_requirements=None): """ This is a function to get a list of the accessURLs for a provided list of lfns. If no defaultSE is provided then one is chosen at random from those with replicase. The protocol allows you the option of specifying xroot or root (or any other available) protocols for the file accessURL. If left blank the default protocol for the SE will be used by Dirac. """ lfnList = [] # Has a list of strings, which are probably lfns been given if all(isinstance(item, str) for item in lfns): lfnList = lfns else: #If some elements are not strings look for the DiracFiles, separates out the LocalFiles from a job's outputfiles list for diracFile in lfns: try: lfnList.append(diracFile.lfn) except AttributeError: pass if not lfnList: logger.error("Provided list does not have LFNs or DiracFiles in it") return # Get all the replicas reps = execute('getReplicas(%s)' % lfnList, cred_req=credential_requirements) # Get the SEs SEs = [] for lf in reps['Successful']: for thisSE in reps['Successful'][lf].keys(): if thisSE not in SEs: SEs.append(thisSE) myURLs = [] # If an SE is specified, move it to be the first element in the list to be processed. if defaultSE != '': if defaultSE in SEs: SEs.remove(defaultSE) SEs.insert(0, defaultSE) else: logger.warning( 'No replica at specified SE, here is a URL for another replica' ) remainingLFNs = list(lfnList) # Loop over the possible SEs and get the URLs of the files stored there. # Remove the successfully found ones from the list and move on to the next SE. for SE in SEs: lfns = remainingLFNs thisSEFiles = execute('getAccessURL(%s, "%s", %s)' % (lfns, SE, protocol), cred_req=credential_requirements)['Successful'] for lfn in thisSEFiles.keys(): myURLs.append(thisSEFiles[lfn]) remainingLFNs.remove(lfn) # If we gotten to the end of the list then break if not remainingLFNs: break return myURLs
def _getStateTime(job, status, getStateTimeResult={}): """Returns the timestamps for 'running' or 'completed' by extracting their equivalent timestamps from the loggingInfo. Args: job (Job): This is the job object we want to update status (str): This is the Ganga status we're updating (running, completed... etc) getStateTimeResult (dict): This is the optional result of executing the approriate getStateTime against this job.backend.id, if not provided the command is called internally """ # Now private to stop server cross-talk from user thread. Since updateStatus calles # this method whether called itself by the user thread or monitoring thread. # Now don't use hook but define our own private version # used in monitoring loop... messy but works. if job.status != status: b_list = ['running', 'completing', 'completed', 'failed'] backend_final = ['failed', 'completed'] # backend stamps if not job.subjobs and status in b_list: for childstatus in b_list: if job.backend.id: logger.debug("Accessing getStateTime() in diracAPI") if childstatus in backend_final: if childstatus in getStateTimeResult: be_statetime = getStateTimeResult[childstatus] else: be_statetime = execute( "getStateTime(%d,\'%s\')" % (job.backend.id, childstatus), cred_req=job.backend. credential_requirements) job.time.timestamps["backend_final"] = be_statetime logger.debug( "Wrote 'backend_final' to timestamps.") break else: time_str = "backend_" + childstatus if time_str not in job.time.timestamps: if childstatus in getStateTimeResult: be_statetime = getStateTimeResult[ childstatus] else: be_statetime = execute( "getStateTime(%d,\'%s\')" % (job.backend.id, childstatus), cred_req=job.backend. credential_requirements) job.time.timestamps["backend_" + childstatus] = be_statetime logger.debug("Wrote 'backend_%s' to timestamps.", childstatus) if childstatus == status: break logger.debug("_getStateTime(job with id: %d, '%s') called.", job.id, job.status) else: logger.debug( "Status changed from '%s' to '%s'. No new timestamp was written", job.status, status)
def browse(self, gui=True): f = self._createTmpFile() if gui: cmd = 'bookkeepingGUI("%s")' % f execute(cmd) l = self._fileToList(f) ds = LHCbDataset() ds.extend([l]) return ds
def browse(self, gui=True): f = self._createTmpFile() if gui: cmd = 'bookkeepingGUI("%s")' % f execute(cmd) l = self._fileToList(f) ds = LHCbDataset() ds.extend([l]) return ds
def test_uploadFile(self, dirac_job): new_lfn = '%s_upload_file' % os.path.dirname(dirac_job.get_file_lfn) location = 'UKI-SOUTHGRID-RALPP-disk' add_file = open('upload_file', 'w') add_file.write(random_str()) add_file.close() confirm = execute('uploadFile("%s","upload_file","%s")' % (new_lfn, location)) assert isinstance(confirm, dict), 'Command not executed successfully' confirm_remove = execute('removeFile("%s")' % new_lfn) assert confirm_remove['OK'], 'Command not executed successfully'
def test_addFile(self): new_lfn = '%s_add_file' % os.path.dirname(self.__class__._getFileLFN) location = 'CERN-USER' add_file = open('add_file', 'w') add_file.write(random_str()) add_file.close() confirm = execute('addFile("%s","add_file","%s","")' % (new_lfn, location)) self.assertTrue(confirm['OK'], 'Command not executed successfully') confirm_remove = execute('removeFile("%s")' % new_lfn) self.assertTrue(confirm_remove['OK'], 'Command not executed successfully')
def getDiracFiles(): import os from GangaDirac.Lib.Files.DiracFile import DiracFile from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns' logger.info('Creating list, this can take a while if you have a large number of SE files, please wait...') execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None) g = GangaList() with open(filename[1:], 'r') as lfnlist: lfnlist.seek(0) g.extend((DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines())) return addProxy(g)
def test_addFile(self, dirac_job, tmpdir): new_lfn = '%s_add_file' % os.path.dirname(dirac_job.get_file_lfn) location = 'UKI-SOUTHGRID-RALPP-disk' temp_file = tmpdir.join('add_file') temp_file.write(random_str()) confirm = execute('addFile("%s","%s","%s","")' % (new_lfn, temp_file, location)) logger.info(confirm) assert confirm['OK'], 'Command not executed successfully' confirm_remove = execute('removeFile("%s")' % new_lfn) logger.info(confirm) assert confirm_remove['OK'], 'Command not executed successfully'
def test_replicateFile(self, dirac_job, dirac_sites): for new_location in dirac_sites: confirm = execute('replicateFile("%s","%s","")' % (dirac_job.get_file_lfn, new_location), cred_req=dirac_job.cred_req, return_raw_dict=True) logger.info(confirm) if not confirm['OK']: continue # If we couldn't add the file, try the next site confirm = execute('removeReplica("%s","%s")' % (dirac_job.get_file_lfn, new_location), cred_req=dirac_job.cred_req, return_raw_dict=True) logger.info(confirm) assert confirm['OK'], 'Command not executed successfully' break # Once we found a working site, stop looking else: raise AssertionError('No working site found')
def getAccessURLs(lfns, defaultSE = '', protocol = '', credential_requirements=None): """ This is a function to get a list of the accessURLs for a provided list of lfns. If no defaultSE is provided then one is chosen at random from those with replicase. The protocol allows you the option of specifying xroot or root (or any other available) protocols for the file accessURL. If left blank the default protocol for the SE will be used by Dirac. """ lfnList = [] # Has a list of strings, which are probably lfns been given if all(isinstance(item, str) for item in lfns): lfnList = lfns else: #If some elements are not strings look for the DiracFiles, separates out the LocalFiles from a job's outputfiles list for diracFile in lfns: try: lfnList.append(diracFile.lfn) except AttributeError: pass if not lfnList: logger.error("Provided list does not have LFNs or DiracFiles in it") return # Get all the replicas reps = execute('getReplicas(%s)' % lfnList, cred_req=credential_requirements) # Get the SEs SEs = [] for lf in reps['Successful']: for thisSE in reps['Successful'][lf].keys(): if thisSE not in SEs: SEs.append(thisSE) myURLs = [] # If an SE is specified, move it to be the first element in the list to be processed. if defaultSE != '': if defaultSE in SEs: SEs.remove(defaultSE) SEs.insert(0, defaultSE) else: logger.warning('No replica at specified SE, here is a URL for another replica') remainingLFNs = list(lfnList) # Loop over the possible SEs and get the URLs of the files stored there. # Remove the successfully found ones from the list and move on to the next SE. for SE in SEs: lfns = remainingLFNs thisSEFiles = execute('getAccessURL(%s, "%s", %s)' % (lfns, SE, protocol), cred_req=credential_requirements)['Successful'] for lfn in thisSEFiles.keys(): myURLs.append(thisSEFiles[lfn]) remainingLFNs.remove(lfn) # If we gotten to the end of the list then break if not remainingLFNs: break return myURLs
def test_addFile(self): new_lfn = '%s_add_file' % os.path.dirname(self.__class__._getFileLFN) location = 'CERN-USER' add_file = open('add_file', 'w') add_file.write(random_str()) add_file.close() confirm = execute('addFile("%s","add_file","%s","")' % (new_lfn, location)) self.assertTrue(confirm['OK'], 'Command not executed successfully') confirm_remove = execute('removeFile("%s")' % new_lfn) self.assertTrue(confirm_remove['OK'], 'Command not executed successfully')
def getDiracFiles(): import os from GangaDirac.Lib.Files.DiracFile import DiracFile from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns' logger.info( 'Creating list, this can take a while if you have a large number of SE files, please wait...' ) execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None) g = GangaList() with open(filename[1:], 'r') as lfnlist: lfnlist.seek(0) g.extend( (DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines())) return addProxy(g)
def _common_submit(self, dirac_script): '''Submit the job via the Dirac server. Args: dirac_script (str): filename of the JDL which is to be submitted to DIRAC ''' j = self.getJobObject() self.id = None self.actualCE = None self.status = None self.extraInfo = None self.statusInfo = '' j.been_queued = False dirac_cmd = """execfile(\'%s\')""" % dirac_script try: result = execute(dirac_cmd, cred_req=self.credential_requirements) except GangaDiracError as err: err_msg = 'Error submitting job to Dirac: %s' % str(err) logger.error(err_msg) logger.error("\n\n===\n%s\n===\n" % dirac_script) logger.error("\n\n====\n") with open(dirac_script, 'r') as file_in: logger.error("%s" % file_in.read()) logger.error("\n====\n") raise BackendError('Dirac', err_msg) idlist = result if type(idlist) is list: return self._setup_bulk_subjobs(idlist, dirac_script) self.id = idlist return type(self.id) == int
def test_timedetails(self, dirac_job): confirm = execute('timedetails("%s")' % dirac_job.id, return_raw_dict=True) logger.info(confirm) assert confirm['OK'], 'timedetails command not executed successfully' assert isinstance(confirm['Value'], dict), 'Command not executed successfully'
def _common_submit(self, dirac_script): '''Submit the job via the Dirac server. Args: dirac_script (str): filename of the JDL which is to be submitted to DIRAC ''' j = self.getJobObject() self.id = None self.actualCE = None self.status = None self.extraInfo = None self.statusInfo = '' j.been_queued = False dirac_cmd = """execfile(\'%s\')""" % dirac_script try: result = execute(dirac_cmd, cred_req=self.credential_requirements) except GangaDiracError as err: err_msg = 'Error submitting job to Dirac: %s' % str(err) logger.error(err_msg) logger.error("\n\n===\n%s\n===\n" % dirac_script) logger.error("\n\n====\n") with open(dirac_script, 'r') as file_in: logger.error("%s" % file_in.read()) logger.error("\n====\n") raise BackendError('Dirac', err_msg) idlist = result if type(idlist) is list: return self._setup_bulk_subjobs(idlist, dirac_script) self.id = idlist return type(self.id) == int
def internalCopyTo(self, targetPath): """ Retrieves locally the file matching this DiracFile object pattern. If localPath is specified Args: targetPath(str): The path the file should be placed at locally """ to_location = targetPath if self.lfn == "": raise GangaFileError('Can\'t download a file without an LFN.') logger.info("Getting file %s" % self.lfn) stdout = execute('getFile("%s", destDir="%s")' % (self.lfn, to_location), cred_req=self.credential_requirements) if self.namePattern == "": name = os.path.basename(self.lfn) if self.compressed: name = name[:-3] self.namePattern = name if self.guid == "" or not self.locations: self.getMetadata() return True
def diracAPI(cmd, timeout=60, cred_req=None): ''' Args: cmd (str): This is the command you want to execute from within an active DIRAC session timeout (int): This is the maximum time(sec) the session has to complete the task cred_req (ICredentialRequirement): This is the (optional) credential passed to construct the correct DIRAC env Execute DIRAC API commands from w/in Ganga. The stdout will be returned, e.g.: # this will simply return 87 diracAPI(\'print 87\') # this will return the status of job 66 # note a Dirac() object is already provided set up as \'dirac\' diracAPI(\'print Dirac().status([66])\') diracAPI(\'print dirac.status([66])\') # or can achieve the same using command defined and included from # getConfig('DIRAC')['DiracCommandFiles'] diracAPI(\'status([66])\') ''' return execute(cmd, timeout=timeout, cred_req=cred_req)
def checkTier1s(self): cmd = 'checkTier1s()' result = execute(cmd, cred_req=self.credential_requirements) if not result_ok(result): logger.warning('Could not obtain Tier-1 info: %s' % str(result)) return return result.get('Value', {})
def test_status(self, dirac_job): confirm = execute( "status([%s], %s)" % (dirac_job.id, repr(statusmapping)), cred_req=dirac_job.cred_req, return_raw_dict=True ) logger.info(confirm) assert confirm["OK"], "status command not executed successfully" assert isinstance(confirm["Value"], list), "Command not executed successfully"
def test_normCPUTime(self, dirac_job): confirm = execute('normCPUTime("%s")' % dirac_job.id, return_raw_dict=True) logger.info(confirm) assert confirm['OK'], 'normCPUTime command not executed successfully' assert isinstance(confirm['Value'], str), 'normCPUTime ommand not executed successfully'
def replicate(self, destSE): """ Replicate this file from self.locations[0] to destSE """ if not self.locations: if self.lfn != '': self.getReplicas() else: raise GangaException( 'Can\'t replicate a file if it isn\'t already on a DIRAC SE, upload it first' ) if self.lfn == '': raise GangaException('Must supply an lfn to replicate') logger.info("Replicating file %s to %s" % (self.lfn, destSE)) stdout = execute('replicateFile("%s", "%s", "%s")' % (self.lfn, destSE, self.locations[0])) if isinstance(stdout, dict) and stdout.get( 'OK', False) and self.lfn in stdout.get( 'Value', {'Successful': {}})['Successful']: self.locations.append(destSE) self.getReplicas(forceRefresh=True) return logger.error("Error in replicating file '%s' : %s" % (self.lfn, stdout)) return stdout
def checkTier1s(self): cmd = 'checkTier1s()' result = execute(cmd) if not result_ok(result): logger.warning('Could not obtain Tier-1 info: %s' % str(result)) return return result.get('Value', {})
def diracAPI(cmd, timeout=60, cred_req=None): ''' Args: cmd (str): This is the command you want to execute from within an active DIRAC session timeout (int): This is the maximum time(sec) the session has to complete the task cred_req (ICredentialRequirement): This is the (optional) credential passed to construct the correct DIRAC env Execute DIRAC API commands from w/in Ganga. The stdout will be returned, e.g.: # this will simply return 87 diracAPI(\'print 87\') # this will return the status of job 66 # note a Dirac() object is already provided set up as \'dirac\' diracAPI(\'print Dirac().status([66])\') diracAPI(\'print dirac.status([66])\') # or can achieve the same using command defined and included from # getConfig('DIRAC')['DiracCommandFiles'] diracAPI(\'status([66])\') ''' return execute(cmd, timeout=timeout, cred_req=cred_req)
def uploadLocalFile(job, namePattern, localDir, should_del=True): """ Upload a locally available file to the grid as a DiracFile. Randomly chooses an SE. Args: namePattern (str): name of the file localDir (str): localDir of the file should_del = (bool): should we delete the local file? Return DiracFile: a DiracFile of the uploaded LFN on the grid """ new_df = DiracFile(namePattern, localDir=localDir) trySEs = getConfig('DIRAC')['allDiracSE'] random.shuffle(trySEs) new_lfn = os.path.join(getInputFileDir(job), namePattern) returnable = None for SE in trySEs: #Check that the SE is writable if execute('checkSEStatus("%s", "%s")' % (SE, 'Write')): try: returnable = new_df.put(force=True, uploadSE=SE, lfn=new_lfn)[0] break except GangaDiracError as err: raise GangaException("Upload of input file as LFN %s to SE %s failed" % (new_lfn, SE)) if not returnable: raise GangaException("Failed to upload input file to any SE") if should_del: os.unlink(os.path.join(localDir, namePattern)) return returnable
def getLFNReplicas(allLFNs, index, allLFNData): output = None toy_num = 0 global LFN_parallel_limit this_min = index * LFN_parallel_limit if (index + 1) * LFN_parallel_limit > len(allLFNs): this_max = len(allLFNs) else: this_max = (index + 1) * LFN_parallel_limit for toy_num in range(5): try: from GangaDirac.Lib.Utilities.DiracUtilities import execute output = execute('getReplicas(%s)' % str(allLFNs[this_min:this_max])) these_values = output.get('Value').get('Successful') break except Exception, err: logger.error("Dirac Error: %s" % str(err)) # catch 'Successful' not found and others pass
def test_getStateTime(self, dirac_job): confirm = execute( 'getStateTime("%s", "completed")' % dirac_job.id, cred_req=dirac_job.cred_req, return_raw_dict=True ) logger.info(confirm) assert confirm["OK"], "getStateTime command not executed successfully" assert isinstance(confirm["Value"], datetime.datetime), "getStateTime command not executed successfully"
def diracAPI_interactive(connection_attempts=5): ''' Run an interactive server within the DIRAC environment. ''' import os import sys import time import inspect import traceback from GangaDirac.Lib.Server.InspectionClient import runClient serverpath = os.path.join(os.path.dirname(inspect.getsourcefile(runClient)), 'InspectionServer.py') from Ganga.Core.GangaThread.WorkerThreads import getQueues getQueues().add(execute("execfile('%s')" % serverpath, timeout=None, shell=False)) #time.sleep(1) sys.stdout.write( "\nType 'q' or 'Q' or 'exit' or 'exit()' to quit but NOT ctrl-D") i = 0 excpt = None while i < connection_attempts: try: runClient() break except: if i == (connection_attempts - 1): excpt = traceback.format_exc() finally: i += 1 return excpt
def getMetadata(self): """ Get Metadata associated with this files lfn. This method will also try to automatically set the files guid attribute. """ if self.lfn == "": self._optionallyUploadLocalFile() # eval again here as datatime not included in dirac_ganga_server r = execute('getMetadata("%s")' % self.lfn) try: ret = eval(r) except: ret = r if isinstance(ret, dict) and ret.get('OK', False) and self.lfn in ret.get('Value', {'Successful': {}})['Successful']: try: if self.guid != ret['Value']['Successful'][self.lfn]['GUID']: self.guid = ret['Value']['Successful'][self.lfn]['GUID'] except: pass try: reps = self.getReplicas() ret['Value']['Successful'][self.lfn].update({'replicas': self.locations}) except: pass return ret
def checkTier1s(self): cmd = "checkTier1s()" result = execute(cmd, cred_req=self.credential_requirements) if not result_ok(result): logger.warning("Could not obtain Tier-1 info: %s" % str(result)) return return result.get("Value", {})
def test_getOutputDataLFNs(self, dirac_job): confirm = execute('getOutputDataLFNs("%s")' % dirac_job.id, return_raw_dict=True) logger.info(confirm) logger.info(confirm) assert confirm[ 'OK'], 'getOutputDataLFNs command not executed successfully'
def test_getStateTime(self, dirac_job): confirm = execute('getStateTime("%s", "completed")' % dirac_job.id, return_raw_dict=True) logger.info(confirm) assert confirm['OK'], 'getStateTime command not executed successfully' assert isinstance(confirm['Value'], datetime.datetime ), 'getStateTime command not executed successfully'
def get_result(command, exception_message=None, eval_includes=None, retry_limit=5, credential_requirements=None): ''' This method returns the object from the result of running the given command against DIRAC. Args: command (str): This is the command we want to get the output from exception_message (str): This is the message we want to display if the command fails eval_includes (str): This is optional extra objects to include when evaluating the output from the command retry_limit (int): This is the number of times to retry the command if it initially fails credential_requirements (ICredentialRequirement): This is the optional credential which is to be used for this DIRAC session ''' retries = 0 while retries < retry_limit: try: return execute(command, eval_includes=eval_includes, cred_req=credential_requirements) except GangaDiracError as err: logger.error(exception_message) logger.debug("Sleeping for 5 additional seconds to reduce possible overloading") time.sleep(5.) if retries == retry_limit - 1: raise retries = retries + 1 logger.error("An Error Occured: %s" % err) logger.error("Retrying: %s / %s " % (str(retries + 1), str(retry_limit)))
def getLFNReplicas(allLFNs, index, allLFNData): output = None toy_num = 0 global LFN_parallel_limit this_min = index * LFN_parallel_limit if (index + 1) * LFN_parallel_limit > len(allLFNs): this_max = len(allLFNs) else: this_max = (index + 1) * LFN_parallel_limit for toy_num in range(5): try: from GangaDirac.Lib.Utilities.DiracUtilities import execute output = execute('getReplicas(%s)' % str(allLFNs[this_min:this_max])) these_values = output.get('Value').get('Successful') break except Exception, err: logger.error("Dirac Error: %s" % str(err)) # catch 'Successful' not found and others pass
def test_getLHCbInputDataCatalog(self): confirm = execute('getLHCbInputDataCatalog("%s",0,"","")' % (self.__class__._getFileLFN)) print("%s" % str(confirm)) self.assertEqual(confirm['Message'], 'Failed to access all of requested input data', 'Command not executed successfully')
def uploadLocalFile(job, namePattern, localDir, should_del=True): """ Upload a locally available file to the grid as a DiracFile. Randomly chooses an SE. Args: namePattern (str): name of the file localDir (str): localDir of the file should_del = (bool): should we delete the local file? Return DiracFile: a DiracFile of the uploaded LFN on the grid """ new_df = DiracFile(namePattern, localDir=localDir) new_df.credential_requirements=job.backend.credential_requirements trySEs = getConfig('DIRAC')['allDiracSE'] random.shuffle(trySEs) new_lfn = os.path.join(getInputFileDir(job), namePattern) returnable = None for SE in trySEs: #Check that the SE is writable if execute('checkSEStatus("%s", "%s")' % (SE, 'Write')): try: returnable = new_df.put(force=True, uploadSE=SE, lfn=new_lfn)[0] break except GangaDiracError as err: raise GangaException("Upload of input file as LFN %s to SE %s failed" % (new_lfn, SE)) if not returnable: raise GangaException("Failed to upload input file to any SE") if should_del: os.unlink(os.path.join(localDir, namePattern)) return returnable
def diracAPI_interactive(connection_attempts=5): ''' Run an interactive server within the DIRAC environment. ''' import os import sys import time import inspect import traceback from GangaDirac.Lib.Server.InspectionClient import runClient serverpath = os.path.join( os.path.dirname(inspect.getsourcefile(runClient)), 'InspectionServer.py') from Ganga.Core.GangaThread.WorkerThreads import getQueues getQueues().add( execute("execfile('%s')" % serverpath, timeout=None, shell=False)) #time.sleep(1) sys.stdout.write( "\nType 'q' or 'Q' or 'exit' or 'exit()' to quit but NOT ctrl-D") i = 0 excpt = None while i < connection_attempts: try: runClient() break except: if i == (connection_attempts - 1): excpt = traceback.format_exc() finally: i += 1 return excpt
def getMetadata(self): """ Get Metadata associated with this files lfn. This method will also try to automatically set the files guid attribute. """ if self.lfn == "": self._optionallyUploadLocalFile() # eval again here as datatime not included in dirac_ganga_server r = execute('getMetadata("%s")' % self.lfn) try: ret = eval(r) except: ret = r if isinstance(ret, dict) and ret.get('OK', False) and self.lfn in ret.get('Value', {'Successful': {}})['Successful']: try: if self.guid != ret['Value']['Successful'][self.lfn]['GUID']: self.guid = ret['Value']['Successful'][self.lfn]['GUID'] except: pass try: reps = self.getReplicas() ret['Value']['Successful'][self.lfn].update({'replicas': self.locations}) except: pass return ret
def get_result(command, logger_message=None, exception_message=None, eval_includes=None, retry_limit=5): retries = 0 while retries < retry_limit: try: result = execute(command, eval_includes=eval_includes) if not result_ok(result): if logger_message is not None: logger.warning('%s: %s' % (logger_message, str(result))) if exception_message is not None: logger.warning("Failed to run: %s" % str(command)) logger.warning("includes:\n%s" % str(eval_includes)) logger.warning("Result: '%s'" % str(result)) raise GangaException(exception_message) raise GangaException("Failed to return result of '%s': %s" % (command, result)) return result except Exception as x: import time logger.debug("Sleeping for 5 additional seconds to reduce possible overloading") time.sleep(5.) if retries == retry_limit - 1: raise x retries = retries + 1 logger.error("An Error Occured: %s" % str(x)) logger.error("Retrying: %s / %s " % (str(retries + 1), str(retry_limit)))
def accessURL(self, thisSE=''): """ Attempt to find an accessURL which corresponds to the specified SE. If no SE is specified then return a random one from all the replicas. """ _accessURLs = [] if len(self.subfiles) == 0: self.getReplicas() # If the SE isn't specified return a random choice. if thisSE == '': this_SE = random.choice(self.locations) # If the SE is specified and we got a URL for a replica there, use it. elif thisSE in self.locations: this_SE = thisSE # If the specified SE doesn't have a replica then return another one at random. else: logger.warning('No replica at specified SE for the LFN %s, here is a URL for another replica' % self.lfn) this_SE = random.choice(self.locations) myurl = execute('getAccessURL("%s" , "%s")' % (self.lfn, this_SE)) this_accessURL = myurl['Value']['Successful'][self.lfn] _accessURLs.append(this_accessURL) else: # For all subfiles request the accessURL, 1 URL per LFN for i in self.subfiles: _accessURLs.append(i.accessURL(thisSE)[0]) return _accessURLs
def DiracSizeSplitter(inputs, filesPerJob, maxSize, ignoremissing): """ Generator that yields a datasets for LHCbdirac split jobs by size """ #logger.debug( "DiracSplitter" ) #logger.debug( "inputs: %s" % str( inputs ) ) split_files = [] i = inputs.__class__() if inputs.getLFNs() != len(inputs.files): raise SplittingError( "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata") all_files = igroup(inputs.files[:maxFiles], getConfig('DIRAC')['splitFilesChunks'], leftovers=True) #logger.debug( "Looping over all_files" ) #logger.debug( "%s" % str( all_files ) ) for files in all_files: i.files = files LFNsToSplit = i.getLFNs() if(len(LFNsToSplit)) > 1: result = execute('splitInputDataBySize(%s,%d)' % (i.getLFNs(), filesPerJob)) if not result_ok(result): logger.error('DIRAC:: Error splitting files: %s' % str(result)) raise SplittingError('Error splitting files.') split_files += result.get('Value', []) else: split_files = [LFNsToSplit] if len(split_files) == 0: raise SplittingError('An unknown error occured.') # FIXME # check that all files were available on the grid big_list = [] for l in split_files: big_list.extend(l) diff = set(inputs.getFileNames()[:maxFiles]).difference(big_list) if len(diff) > 0: for f in diff: logger.warning('Ignored file: %s' % f) if not ignoremissing: raise SplittingError('Some files not found!') ### logger.debug("Split Files: %s" % str(split_files)) for dataset in split_files: yield dataset
def get_result(command, logger_message=None, exception_message=None, eval_includes=None, retry_limit=5): retries = 0 while retries < retry_limit: try: result = execute(command, eval_includes=eval_includes) if not result_ok(result): if logger_message is not None: logger.warning('%s: %s' % (logger_message, str(result))) if exception_message is not None: logger.warning("Failed to run: %s" % str(command)) logger.warning("includes:\n%s" % str(eval_includes)) logger.warning("Result: '%s'" % str(result)) raise GangaException(exception_message) raise GangaException("Failed to return result of '%s': %s" % (command, result)) return result except Exception as x: import time logger.debug("Sleeping for 5 additional seconds to reduce possible overloading") time.sleep(5.) if retries == retry_limit - 1: raise x retries = retries + 1 logger.error("An Error Occured: %s" % str(x)) logger.error("Retrying: %s / %s " % (str(retries + 1), str(retry_limit)))
def getMetadata(self): """ Get Metadata associated with this files lfn. This method will also try to automatically set the files guid attribute. """ if self.lfn == "": self._optionallyUploadLocalFile() # check that it has a replica if not self.getReplicas(): raise GangaFileError("No replica found for this file!") # eval again here as datatime not included in dirac_ganga_server ret = execute('getMetadata("%s")' % self.lfn, cred_req=self.credential_requirements) if self.guid != ret.get('Successful', {}).get(self.lfn, {}).get( 'GUID', False): self.guid = ret['Successful'][self.lfn]['GUID'] reps = self.getReplicas() ret['Successful'][self.lfn].update({'replicas': self.locations}) return ret
def internalCopyTo(self, targetPath): """ Retrieves locally the file matching this DiracFile object pattern. If localPath is specified Args: targetPath(str): The path the file should be placed at locally """ to_location = targetPath if self.lfn == "": raise GangaFileError('Can\'t download a file without an LFN.') logger.info("Getting file %s" % self.lfn) stdout = execute('getFile("%s", destDir="%s")' % (self.lfn, to_location), cred_req=self.credential_requirements) if self.namePattern == "": name = os.path.basename(self.lfn) if self.compressed: name = name[:-3] self.namePattern = name if self.guid == "" or not self.locations: self.getMetadata() return True
def _getStateTime(job, status): """Returns the timestamps for 'running' or 'completed' by extracting their equivalent timestamps from the loggingInfo.""" # Now private to stop server cross-talk from user thread. Since updateStatus calles # this method whether called itself by the user thread or monitoring thread. # Now don't use hook but define our own private version # used in monitoring loop... messy but works. if job.status != status: b_list = ['running', 'completing', 'completed', 'failed'] backend_final = ['failed', 'completed'] # backend stamps if not job.subjobs and status in b_list: for childstatus in b_list: if job.backend.id: logger.debug("Accessing getStateTime() in diracAPI") dirac_cmd = "getStateTime(%d,\'%s\')" % (job.backend.id, childstatus) be_statetime = execute(dirac_cmd) if childstatus in backend_final: job.time.timestamps["backend_final"] = be_statetime logger.debug("Wrote 'backend_final' to timestamps.") else: job.time.timestamps["backend_" + childstatus] = be_statetime logger.debug("Wrote 'backend_%s' to timestamps.", childstatus) if childstatus == status: break logger.debug("_getStateTime(job with id: %d, '%s') called.", job.id, job.status) else: logger.debug("Status changed from '%s' to '%s'. No new timestamp was written", job.status, status)
def timedetails(self): """Prints contents of the loggingInfo from the Dirac API.""" if not self.id: return None logger.debug("Accessing timedetails() in diracAPI") dirac_cmd = 'timedetails(%d)' % self.id return execute(dirac_cmd)
def replicateJobFile(fileToReplicate): """ A method to replicate a file to a random SE. """ if not isinstance(fileToReplicate, DiracFile): raise GangaDiracError( "Can only request replicas of DiracFiles. %s is not a DiracFile" % fileToReplicate) if len(fileToReplicate.locations) == 0: fileToReplicate.getReplicas() trySEs = [ SE for SE in getConfig('DIRAC')['allDiracSE'] if SE not in fileToReplicate.locations ] random.shuffle(trySEs) success = None for SE in trySEs: if execute('checkSEStatus("%s", "%s")' % (SE, 'Write')): try: fileToReplicate.replicate(SE) success = True break except (GangaFileError, GangaDiracError) as err: raise err if not success: raise GangaException("Failed to replicate %s to any SE" % fileToReplicate.lfn)
def peek(self, filename=None, command=None): """Peek at the output of a job (Note: filename/command are ignored).""" dirac_cmd = 'peek(%d)' % self.id result = execute(dirac_cmd) if result_ok(result): logger.info(result['Value']) else: logger.error("No peeking available for Dirac job '%i'.", self.id)
def getAccessURLs(lfns, defaultSE = ''): """ This is a function to get a list of the accessURLs for a provided list of lfns. """ lfnList = [] # Has a list of strings, which are probably lfns been given if all(isinstance(item, str) for item in lfns): lfnList = lfns else: #If some elements are not strings look for the DiracFiles, separates out the LocalFiles from a job's outputfiles list for diracFile in lfns: if isinstance(stripProxy(diracFile), DiracFile): lfnList.append(diracFile.lfn) if not lfnList: logger.error("Provided list does not have LFNs or DiracFiles in it") return # Get all the replicas reps = execute('getReplicas(%s)' % lfnList) # Get the SEs SEs = [] for lf in reps['Successful']: for thisSE in reps['Successful'][lf].keys(): if thisSE not in SEs: SEs.append(thisSE) myURLs = [] # If an SE is specified, move it to be the first element in the list to be processed. if defaultSE != '': if defaultSE in SEs: SEs.remove(defaultSE) SEs.insert(0, defaultSE) else: logger.warning('No replica at specified SE, here is a URL for another replica') remainingLFNs = list(lfnList) # Loop over the possible SEs and get the URLs of the files stored there. # Remove the successfully found ones from the list and move on to the next SE. for SE in SEs: lfns = remainingLFNs thisSEFiles = execute('getAccessURL(%s, "%s")' % (lfns , SE))['Successful'] for lfn in thisSEFiles.keys(): myURLs.append(thisSEFiles[lfn]) remainingLFNs.remove(lfn) # If we gotten to the end of the list then break if not remainingLFNs: break return myURLs
def debug(self): '''Obtains some (possibly) useful DIRAC debug info. ''' # check services cmd = 'getServicePorts()' result = execute(cmd) if type(result) == str: import datetime try: result = eval(result) except Exception as err: logger.debug("Exception, err: %s" % str(err)) pass if not result_ok(result): logger.warning('Could not obtain services: %s' % str(result)) return services = result.get('Value', {}) for category in services: system, service = category.split('/') cmd = "ping('%s','%s')" % (system, service) result = execute(cmd) if type(result) == str: import datetime try: result = eval(result) except Exception as err: logger.debug("Exception: %s" % str(err)) pass msg = 'OK.' if not result_ok(result): msg = '%s' % result['Message'] logger.info('%s: %s' % (category, msg)) # get pilot info for this job if type(self.id) != int: return j = self.getJobObject() cwd = os.getcwd() debug_dir = j.getDebugWorkspace().getPath() cmd = "getJobPilotOutput(%d,'%s')" % \ (self.id, debug_dir) result = execute(cmd) if result_ok(result): logger.info('Pilot Info: %s/pilot_%d/std.out.' % (debug_dir, self.id)) else: logger.error(result.get('Message', ''))
def kill(self): """ Kill a Dirac jobs""" if not self.id: return None dirac_cmd = 'kill(%d)' % self.id result = execute(dirac_cmd) if not result_ok(result): raise BackendError('Dirac', 'Could not kill job: %s' % str(result)) return result['OK']