def _setup_bulk_subjobs(self, dirac_ids, dirac_script): """ This is the old bulk submit method which is used to construct the subjobs for a parametric job Args: dirac_ids (list): This is a list of the Dirac ids which have been created dirac_script (str): Name of the dirac script which contains the job jdl """ f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) return True
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): """ This is the old bulk submit method which is used to construct the subjobs for a parametric job Args: dirac_ids (list): This is a list of the Dirac ids which have been created dirac_script (str): Name of the dirac script which contains the job jdl """ f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError( 'Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC' ) master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) return True
def createSubjob(self, job, additional_skip_args=None): """ Create a new subjob by copying the master job and setting all fields correctly. """ from Ganga.GPIDev.Lib.Job.Job import Job if additional_skip_args is None: additional_skip_args = [] j = Job() skipping_args = ['splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs'] for arg in additional_skip_args: skipping_args.append(arg) j.copyFrom(job, skipping_args) j.splitter = None j.inputsandbox = [] j.inputfiles = [] j.inputdata = None return j
def createSubjob(self, job, additional_skip_args=None): """ Create a new subjob by copying the master job and setting all fields correctly. """ from Ganga.GPIDev.Lib.Job.Job import Job if additional_skip_args is None: additional_skip_args = [] j = Job() skipping_args = [ 'splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs' ] for arg in additional_skip_args: skipping_args.append(arg) j.copyFrom(job, skipping_args) j.splitter = None j.inputsandbox = [] j.inputfiles = [] j.inputdata = None return j
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) master_job._commit() return True
def _setup_bulk_subjobs(self, dirac_ids, dirac_script): f = open(dirac_script, 'r') parametric_datasets = get_parametric_datasets(f.read().split('\n')) f.close() if len(parametric_datasets) != len(dirac_ids): raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC') from Ganga.GPIDev.Lib.Job.Job import Job master_job = self.getJobObject() master_job.subjobs = [] for i in range(len(dirac_ids)): j = Job() j.copyFrom(master_job) j.splitter = None j.backend.id = dirac_ids[i] j.id = i j.inputdata = self._setup_subjob_dataset(parametric_datasets[i]) j.status = 'submitted' j.time.timenow('submitted') master_job.subjobs.append(j) master_job._commit() return True
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(CoreTransform, self).createUnits() # Use the given splitter to create the unit definitions if len(self.units) > 0: # already have units so return return if self.unit_splitter == None and len(self.inputdata) == 0: raise ApplicationConfigurationError( None, "No unit splitter or InputData provided for CoreTransform unit creation, Transform %d (%s)" % (self.getID(), self.name), ) # ----------------------------------------------------------------- # split over unit_splitter by preference if self.unit_splitter: # create a dummy job, assign everything and then call the split j = Job() j.backend = self.backend.clone() j.application = self.application.clone() if self.inputdata: j.inputdata = self.inputdata.clone() subjobs = self.unit_splitter.split(j) if len(subjobs) == 0: raise ApplicationConfigurationError( None, "Unit splitter gave no subjobs after split for CoreTransform unit creation, Transform %d (%s)" % (self.getID(), self.name), ) # only copy the appropriate elements fields = [] if len(self.fields_to_copy) > 0: fields = self.fields_to_copy elif isType(self.unit_splitter, GenericSplitter): if self.unit_splitter.attribute != "": fields = [self.unit_splitter.attribute.split(".")[0]] else: for attr in self.unit_splitter.multi_attrs.keys(): fields.append(attr.split(".")[0]) # now create the units from these jobs for sj in subjobs: unit = CoreUnit() for attr in fields: setattr(unit, attr, copy.deepcopy(getattr(sj, attr))) self.addUnitToTRF(unit) # ----------------------------------------------------------------- # otherwise split on inputdata elif len(self.inputdata) > 0: if self.files_per_unit > 0: # combine all files and split accorindgly filelist = [] for ds in self.inputdata: if isType(ds, GangaDataset): for f in ds.files: if f.containsWildcards(): # we have a wildcard so grab the subfiles for sf in f.getSubFiles(process_wildcards=True): filelist.append(sf) else: # no wildcards so just add the file filelist.append(f) else: logger.warning("Dataset '%s' doesn't support files" % getName(ds)) # create DSs and units for this list of files fid = 0 while fid < len(filelist): unit = CoreUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = GangaDataset(files=filelist[fid : fid + self.files_per_unit]) unit.inputdata.treat_as_inputfiles = self.inputdata[0].treat_as_inputfiles fid += self.files_per_unit self.addUnitToTRF(unit) else: # just produce one unit per dataset for ds in self.inputdata: # avoid splitting over chain inputs if isType(ds, TaskChainInput): continue unit = CoreUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = copy.deepcopy(ds) self.addUnitToTRF(unit)
def master_updateMonitoringInformation(jobs): """Updates the statuses of the list of jobs provided by issuing crab -status.""" logger.info('Updating the monitoring information of ' + str(len(jobs)) + ' jobs') from CRABAPI.RawCommand import crabCommand from CRABClient.ClientExceptions import ConfigurationException import httplib for j in jobs: logger.info('Updating monitoring information for job %d (%s)' % (j.id, j.status)) if not j.backend.requestname: logger.warning("Couldn't find request name for job %s. Skipping" % s) continue crab_work_dir = os.path.join(j.outputdir, j.backend.requestname) logger.info('crab_work_dir: %s' % crab_work_dir) statusresult = {} try: statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True) logger.info("CRAB3 Status result: %s" % statusresult) except httplib.HTTPException as e: logger.error(e.result) except ConfigurationException as ce: # From CRAB3 error message: Error loading CRAB cache file. Try to do 'rm -rf /root/.crab3' and run the crab command again. import subprocess import uuid randomstring = str(uuid.uuid4().get_hex().upper()[0:6]) subprocess.call(["mv", "/root/.crab3", "/tmp/.crab3."+randomstring]) try: statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True) logger.info("CRAB3 Status result: %s" % statusresult) except httplib.HTTPException as e: logger.error(e.headers) logger.error(e.result) try: jobsdict = statusresult['jobs'] except KeyError: jobsdict = {} if jobsdict: logger.info('There are subjob statuses for job %s' % j.id) if not j.subjobs: logger.warning('No subjob object for job %s' % j.id) subjoblist = [None] * len(jobsdict) #j.subjobs = [None] * len(jobsdict) #subjob_index = 0 for crabid, status in jobsdict.items(): crabid = int(crabid) jobstatus = status['State'] logger.info('Creating subjob') sj = Job() sj.copyFrom(j) sj.backend.crabid = crabid sj.inputdata = None sj.id = crabid-1 sj.updateStatus('submitting') sj.backend.updateSubjobStatus(status) subjoblist[crabid-1] = sj for newsubjob in subjoblist: j.subjobs.append(newsubjob) logger.info('New subjobs for job %s: %s' % (j.id, j.subjobs)) #j.subjobs.sort(key=lambda subjob: subjob.id) else: for crabid, status in jobsdict.items(): crabid = int(crabid) j.subjobs[crabid-1].backend.updateSubjobStatus(status) #j.updateStatus('running') else: logger.info('There are no subjobs for job %s' % (j.id)) #logger.info('Checking task status from report: %s' % statusresult['status']) logger.info('Checking task status from report') try: taskstatus = statusresult['status'] if taskstatus in ['FAILED', 'SUBMITFAILED']: logger.info('Job failed: %s' % taskstatus) j.updateStatus('failed') except KeyError: pass
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(CoreTransform, self).createUnits() # Use the given splitter to create the unit definitions if len(self.units) > 0: # already have units so return return if self.unit_splitter == None and len(self.inputdata) == 0: raise ApplicationConfigurationError( "No unit splitter or InputData provided for CoreTransform unit creation, Transform %d (%s)" % (self.getID(), self.name)) # ----------------------------------------------------------------- # split over unit_splitter by preference if self.unit_splitter: # create a dummy job, assign everything and then call the split j = Job() j.backend = self.backend.clone() j.application = self.application.clone() if self.inputdata: j.inputdata = self.inputdata.clone() subjobs = self.unit_splitter.split(j) if len(subjobs) == 0: raise ApplicationConfigurationError( "Unit splitter gave no subjobs after split for CoreTransform unit creation, Transform %d (%s)" % (self.getID(), self.name)) # only copy the appropriate elements fields = [] if len(self.fields_to_copy) > 0: fields = self.fields_to_copy elif isType(self.unit_splitter, GenericSplitter): if self.unit_splitter.attribute != "": fields = [self.unit_splitter.attribute.split(".")[0]] else: for attr in self.unit_splitter.multi_attrs.keys(): fields.append(attr.split(".")[0]) # now create the units from these jobs for sj in subjobs: unit = CoreUnit() for attr in fields: setattr(unit, attr, copy.deepcopy(getattr(sj, attr))) self.addUnitToTRF(unit) # ----------------------------------------------------------------- # otherwise split on inputdata elif len(self.inputdata) > 0: if self.files_per_unit > 0: # combine all files and split accorindgly filelist = [] for ds in self.inputdata: if isType(ds, GangaDataset): for f in ds.files: if f.containsWildcards(): # we have a wildcard so grab the subfiles for sf in f.getSubFiles( process_wildcards=True): filelist.append(sf) else: # no wildcards so just add the file filelist.append(f) else: logger.warning("Dataset '%s' doesn't support files" % getName(ds)) # create DSs and units for this list of files fid = 0 while fid < len(filelist): unit = CoreUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = GangaDataset( files=filelist[fid:fid + self.files_per_unit]) unit.inputdata.treat_as_inputfiles = self.inputdata[ 0].treat_as_inputfiles fid += self.files_per_unit self.addUnitToTRF(unit) else: # just produce one unit per dataset for ds in self.inputdata: # avoid splitting over chain inputs if isType(ds, TaskChainInput): continue unit = CoreUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = copy.deepcopy(ds) self.addUnitToTRF(unit)