def addInputQuery(self, inDS): """Add the given input dataset to the list but only if BK queries aren't given""" if len(self.queries) > 0: logger.error( "Cannot add both input data and BK queries. Query already given") return super(LHCbTransform, self).addInputQuery(inDS)
def addInputQuery(self, inDS): """Add the given input dataset to the list but only if BK queries aren't given""" if len(self.queries) > 0: logger.error( "Cannot add both input data and BK queries. Query already given" ) return super(LHCbTransform, self).addInputQuery(inDS)
def addQuery(self, bk): """Add a BK query to this transform""" # Check if the BKQuery input is correct and append/update if not isType(bk, BKQuery): raise GangaAttributeError( None, 'LHCbTransform expects a BKQuery object passed to the addQuery method') # check we don't already have inputdata if len(self.queries) == 0 and len(self.inputdata) > 0: logger.error( "Cannot add both input data and BK queries. Input Data already present.") return # add the query and update the input data self.queries.append(bk) self.updateQuery()
def addQuery(self, bk): """Add a BK query to this transform""" # Check if the BKQuery input is correct and append/update if not isType(bk, BKQuery): raise GangaAttributeError( None, 'LHCbTransform expects a BKQuery object passed to the addQuery method' ) # check we don't already have inputdata if len(self.queries) == 0 and len(self.inputdata) > 0: logger.error( "Cannot add both input data and BK queries. Input Data already present." ) return # add the query and update the input data self.queries.append(bk) self.updateQuery()
def removeUnusedData(self): """Remove any output data from orphaned jobs""" for unit in self.units: for jid in unit.prev_job_ids: try: logger.warning("Removing data from job '%d'..." % jid) job = GPI.jobs(jid) jlist = [] if len(job.subjobs) > 0: jlist = job.subjobs else: jlist = [job] for sj in jlist: for f in sj.outputfiles: if isType(f, DiracFile) == "DiracFile" and f.lfn: f.remove() except: logger.error("Problem deleting data for job '%d'" % jid) pass
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(LHCbTransform, self).createUnits() if len(self.inputdata) > 0: # check for conflicting input if self.mc_num_units > 0: logger.warning("Inputdata specified - MC Event info ignored") # loop over input data and see if we need to create any more units import copy for id, inds in enumerate(self.inputdata): if not isType(inds, LHCbDataset): continue # go over the units and see what files have been assigned assigned_data = LHCbDataset() for unit in self.units: if unit.input_datset_index != id: continue assigned_data.files += unit.inputdata.files # any new files new_data = LHCbDataset( files = self.inputdata[id].difference(assigned_data).files ) if len(new_data.files) == 0: continue # create units for these files if self.files_per_unit > 0: # loop over the file array and create units for each set num = 0 while num < len(new_data.files): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.input_datset_index = id self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files[ num:num + self.files_per_unit] num += self.files_per_unit else: # new unit required for this dataset unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files elif self.mc_num_units > 0: if len(self.units) == 0: # check for appropriate splitter from GPI import GaussSplitter if not self.splitter or isType(self.splitter, GaussSplitter): logger.warning("No GaussSplitter specified - first event info ignored") # create units for MC generation for i in range(0, self.mc_num_units): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) else: logger.error( "Please specify either inputdata or MC info for unit generation")
def createUnits(self): """Create new units if required given the inputdata""" # call parent for chaining super(LHCbTransform, self).createUnits() if len(self.inputdata) > 0: # check for conflicting input if self.mc_num_units > 0: logger.warning("Inputdata specified - MC Event info ignored") # loop over input data and see if we need to create any more units import copy for id, inds in enumerate(self.inputdata): if not isType(inds, LHCbDataset): continue # go over the units and see what files have been assigned assigned_data = LHCbDataset() for unit in self.units: if unit.input_datset_index != id: continue assigned_data.files += unit.inputdata.files # any new files new_data = LHCbDataset( files=self.inputdata[id].difference(assigned_data).files) if len(new_data.files) == 0: continue # create units for these files if self.files_per_unit > 0: # loop over the file array and create units for each set num = 0 while num < len(new_data.files): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.input_datset_index = id self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files[num:num + self. files_per_unit] num += self.files_per_unit else: # new unit required for this dataset unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) unit.inputdata = copy.deepcopy(self.inputdata[id]) unit.inputdata.files = [] unit.inputdata.files += new_data.files elif self.mc_num_units > 0: if len(self.units) == 0: # check for appropriate splitter from GPI import GaussSplitter if not self.splitter or isType(self.splitter, GaussSplitter): logger.warning( "No GaussSplitter specified - first event info ignored" ) # create units for MC generation for i in range(0, self.mc_num_units): unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) self.addUnitToTRF(unit) else: import traceback traceback.print_stack() logger.error( "Please specify either inputdata or MC info for unit generation" )