def createChainUnit(self, parent_units, use_copy_output=True): """Create an output unit given this output data""" # we need a parent job that has completed to get the output files incl_pat_list = [] excl_pat_list = [] for parent in parent_units: if len(parent.active_job_ids) == 0 or parent.status != "completed": return None for inds in self.inputdata: from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput if isType( inds, TaskChainInput ) and inds.input_trf_id == parent._getParent().getID(): incl_pat_list += inds.include_file_mask excl_pat_list += inds.exclude_file_mask # go over the output files and copy the appropriates over as input # files flist = [] import re for parent in parent_units: job = getJobByID(parent.active_job_ids[0]) if job.subjobs: job_list = job.subjobs else: job_list = [job] for sj in job_list: for f in sj.outputfiles: # match any dirac files that are allowed in the file mask if isType(f, DiracFile): if len(incl_pat_list) > 0: for pat in incl_pat_list: if re.search(pat, f.lfn): flist.append("LFN:" + f.lfn) else: flist.append("LFN:" + f.lfn) if len(excl_pat_list) > 0: for pat in excl_pat_list: if re.search( pat, f.lfn) and "LFN:" + f.lfn in flist: flist.remove("LFN:" + f.lfn) # just do one unit that uses all data unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist]) return unit
def createChainUnit(self, parent_units, use_copy_output=True): """Create an output unit given this output data""" # we need a parent job that has completed to get the output files incl_pat_list = [] excl_pat_list = [] for parent in parent_units: if len(parent.active_job_ids) == 0 or parent.status != "completed": return None for inds in self.inputdata: from Ganga.GPI import TaskChainInput if isType(inds, TaskChainInput) and inds.input_trf_id == parent._getParent().getID(): incl_pat_list += inds.include_file_mask excl_pat_list += inds.exclude_file_mask # go over the output files and copy the appropriates over as input # files flist = [] import re for parent in parent_units: job = GPI.jobs(parent.active_job_ids[0]) if job.subjobs: job_list = job.subjobs else: job_list = [job] for sj in job_list: for f in sj.outputfiles: # match any dirac files that are allowed in the file mask if isType(f, DiracFile): if len(incl_pat_list) > 0: for pat in incl_pat_list: if re.search(pat, f.lfn): flist.append("LFN:" + f.lfn) else: flist.append("LFN:" + f.lfn) if len(excl_pat_list) > 0: for pat in excl_pat_list: if re.search(pat, f.lfn) and "LFN:" + f.lfn in flist: flist.remove("LFN:" + f.lfn) # just do one unit that uses all data unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist]) return unit
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(LHCbTransform, self).createUnits() if len(self.inputdata) > 0: # check for conflicting input if self.mc_num_units > 0: logger.warning("Inputdata specified - MC Event info ignored") # loop over input data and see if we need to create any more units import copy for id, inds in enumerate(self.inputdata): if not isType(inds, LHCbDataset): continue # go over the units and see what files have been assigned assigned_data = LHCbDataset() for unit in self.units: if unit.input_datset_index != id: continue assigned_data.files += unit.inputdata.files # any new files new_data = LHCbDataset( files = self.inputdata[id].difference(assigned_data).files ) if len(new_data.files) == 0: continue # create units for these files if self.files_per_unit > 0: # loop over the file array and create units for each set num = 0 while num < len(new_data.files): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.input_datset_index = id self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files[ num:num + self.files_per_unit] num += self.files_per_unit else: # new unit required for this dataset unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files elif self.mc_num_units > 0: if len(self.units) == 0: # check for appropriate splitter from GPI import GaussSplitter if not self.splitter or isType(self.splitter, GaussSplitter): logger.warning("No GaussSplitter specified - first event info ignored") # create units for MC generation for i in range(0, self.mc_num_units): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) else: logger.error( "Please specify either inputdata or MC info for unit generation")
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(LHCbTransform, self).createUnits() if len(self.inputdata) > 0: # check for conflicting input if self.mc_num_units > 0: logger.warning("Inputdata specified - MC Event info ignored") # loop over input data and see if we need to create any more units import copy for id, inds in enumerate(self.inputdata): if not isType(inds, LHCbDataset): continue # go over the units and see what files have been assigned assigned_data = LHCbDataset() for unit in self.units: if unit.input_datset_index != id: continue assigned_data.files += unit.inputdata.files # any new files new_data = LHCbDataset( files=self.inputdata[id].difference(assigned_data).files) if len(new_data.files) == 0: continue # Create units for these files step = self.files_per_unit if step <= 0: step = len(new_data.files) for num in range(0, len(new_data.files), step): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.input_datset_index = id self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files[num:num + step] elif self.mc_num_units > 0: if len(self.units) == 0: # check for appropriate splitter from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter if not self.splitter or isType(self.splitter, GaussSplitter): logger.warning( "No GaussSplitter specified - first event info ignored" ) # create units for MC generation for i in range(0, self.mc_num_units): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) else: import traceback traceback.print_stack() logger.error( "Please specify either inputdata or MC info for unit generation" )