def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j._impl.backend = self._getParent().backend.clone() j._impl.application = self._getParent().application.clone() if not self.inputdata == None: j.inputdata = self.inputdata.clone() trf = self._getParent() task = trf._getParent() # copy across the outputfiles for f in trf.outputfiles: j.outputfiles += [f.clone()] j.inputsandbox = trf.inputsandbox # Sort out the splitter if trf.splitter: j.splitter = trf.splitter.clone() # Postprocessors for pp in trf.postprocessors: j.postprocessors.append(deepcopy(pp)) return j
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j.backend = self._getParent().backend.clone() # copy form ourselves or the parent transform depending on what's # specified fields = [ 'application', 'splitter', 'inputfiles', 'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors' ] for f in fields: if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0): j.postprocessors = copy.deepcopy(addProxy(self).postprocessors) elif (f != "postprocessors" and getattr(self, f)): setattr(j, f, copy.deepcopy(getattr(self, f))) elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0): j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors) elif (f != "postprocessors" and getattr(self._getParent(), f)): setattr(j, f, copy.deepcopy(getattr(self._getParent(), f))) return j
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j._impl.backend = self._getParent().backend.clone() j._impl.application = self._getParent().application.clone() j.inputdata = self.inputdata.clone() trf = self._getParent() task = trf._getParent() # copy across the outputfiles for f in trf.outputfiles: j.outputfiles += [f.clone()] j.inputsandbox = trf.inputsandbox if type(self.eventswanted) == type(''): subLines = self.eventswanted else: subLines = '\n'.join(self.eventswanted) # Base for the naming of each subjob's CSV file incsvfile = j._impl.application.csvfile tmpname = os.path.basename(incsvfile) if len(tmpname.split('.')) > 1: patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patterncsv = tmpname+"_sub%d" from Ganga.GPIDev.Lib.File import FileBuffer thiscsv = patterncsv % self.subpartid # Create the CSV file for this Unit j._impl.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0) j._impl.application.csvfile = j._impl.getInputWorkspace().getPath()+thiscsv j.inputsandbox.append(j._impl.getInputWorkspace().getPath()+thiscsv) # Base for the naming of each subjob's output file tmpname = os.path.basename(j._impl.application.outputfile) if len(tmpname.split('.')) > 1: patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patternout = tmpname+"_sub%d" j._impl.application.outputfile = patternout % self.subpartid # Sort out the splitter if trf.splitter: j.splitter = trf.splitter.clone() return j
def createNewJob(self, partition): """ Returns a new job initialized with the transforms application, backend and name """ task = self._getParent( ) # this works because createNewJob is only called by a task id = task.transforms.index(self) j = GPI.Job() stripProxy(j).backend = self.backend.clone() stripProxy(j).application = self.application.clone() stripProxy(j).application.tasks_id = "%i:%i" % (task.id, id) stripProxy(j).application.id = self.getNewAppID(partition) j.inputdata = self.inputdata j.outputdata = self.outputdata j.inputsandbox = self.inputsandbox j.outputsandbox = self.outputsandbox j.name = "T%i:%i C%i" % (task.id, id, partition) return j
def createNewJob(self, partition): """ Returns a new job initialized with the transforms application, backend and name """ j = GPI.Job() stripProxy(j).backend = self.backend.clone() stripProxy(j).application = self.application.clone() stripProxy(j).application.tasks_id = "%i:%i" % (self.task_id, self.transform_id) stripProxy(j).application.id = self.getNewAppID(partition) if self.splitter is not None: stripProxy(j).splitter = LHCbTaskDummySplitter(self.splitter) # if self.merger is not None: # stripProxy(j).merger = self.merger j.inputdata = self.toProcess_dataset j.outputdata = self.outputdata j.inputsandbox = self.inputsandbox j.outputsandbox = self.outputsandbox j.name = "T%i Tr%i P%i" % (self.task_id, self.transform_id, partition) j.do_auto_resubmit = True self.toProcess_dataset.files = [] return j
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j._impl.backend = self._getParent().backend.clone() j._impl.application = self._getParent().application.clone() if self.inputdata: j.inputdata = self.inputdata.clone() trf = self._getParent() task = trf._getParent() if trf.outputdata: j.outputdata = trf.outputdata.clone() elif j.inputdata and j.inputdata._impl._name == "ATLASLocalDataset" and j.application._impl._name != "TagPrepare": j.outputdata = GPI.ATLASOutputDataset() elif j.application._impl._name != "TagPrepare": j.outputdata = GPI.DQ2OutputDataset() # check for ds name specified and length if j.outputdata and j.outputdata._impl._name == "DQ2OutputDataset": max_length = configDQ2['OUTPUTDATASET_NAMELENGTH'] - 11 # merge names need to be shorter if (j.backend._impl._name == "Panda" or j.backend._impl._name == "Jedi"): if j.backend.requirements.enableMerge: max_length -= 12 if j.backend._impl._name == "Jedi": # go over the outputdata and check for output names that Jedi appends to the outDS name tmp_len_chg = 8 for o in j.outputdata.outputdata: if (len(o)+1) > tmp_len_chg: tmp_len_chg = len(o)+1 max_length -= tmp_len_chg elif j.backend.individualOutDS: max_length -= 8 if j.outputdata.datasetname != "": dsn = [j.outputdata.datasetname, "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())] if len(".".join(dsn)) > max_length: dsn = [j.outputdata.datasetname[: - (len(".".join(dsn)) - max_length)], "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())] else: dsn = [trf.getContainerName()[:-1], self.name, "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())] if len(".".join(dsn)) > max_length: dsn2 = [trf.getContainerName(2 * max_length / 3)[:-1], "", "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())] dsn = [trf.getContainerName(2 * max_length / 3)[:-1], self.name[: - (len(".".join(dsn2)) - max_length)], "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())] j.outputdata.datasetname = '.'.join(dsn).replace(":", "_").replace(" ", "").replace(",","_") j.inputsandbox = self._getParent().inputsandbox j.outputsandbox = self._getParent().outputsandbox # check for splitter - TagPrepare and Jedi don't user splitters if j.application._impl._name == "TagPrepare": return j if j.backend._impl._name == "Jedi": if trf.files_per_job > 0: j.backend.requirements.nFilesPerJob = trf.files_per_job elif trf.MB_per_job > 0: j.backend.requirements.nGBPerJob = trf.MB_per_job / 1000 return j if not trf.splitter: # provide a default number of files if there's nothing else given nfiles = trf.files_per_job if nfiles < 1: nfiles = 5 if j.inputdata._impl._name == "ATLASLocalDataset": j.splitter = AthenaSplitterJob() if trf.subjobs_per_unit > 0: j.splitter.numsubjobs = trf.subjobs_per_unit else: import math j.splitter.numsubjobs = int( math.ceil( len(j.inputdata.names) / float(nfiles) ) ) else: j.splitter = DQ2JobSplitter() if trf.MB_per_job > 0: j.splitter.filesize = trf.MB_per_job elif trf.subjobs_per_unit > 0: j.splitter.numsubjobs = trf.subjobs_per_unit else: j.splitter.numfiles = nfiles else: j.splitter = trf.splitter.clone() # postprocessors if len(self._getParent().postprocessors.process_objects) > 0: import copy j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors ) return j