def addQuery(self, transform, bkQuery, associate=True): """Allows the user to add multiple transforms corresponding to the list of BKQuery type objects given in the second argument. The first argument is a transform object to use as the basis for the creation of further transforms.""" if not isinstance(transform, LHCbTransform): raise GangaException( None, 'First argument must be an LHCbTransform object to use as the basis for establishing the new transforms') # Check if the template transform is associated with the Task try: self.transforms.index(transform) except: if associate: logger.info( 'The transform is not associated with this Task, doing so now.') self.appendTransform(transform) # Check if the BKQuery input is correct and append/update if type(bkQuery) is not list: bkQuery = [bkQuery] for bk in bkQuery: if not isType(bk, BKQuery): raise GangaAttributeError( None, 'LHCbTransform expects a BKQuery object or list of BKQuery objects passed to the addQuery method') if len(transform.queries) != 0: # If template has no query itself logger.info('Attaching query to transform') transform.addQuery(stripProxy(bk)) else: # Duplicate from template logger.info('Duplicating transform to add new query.') tr = deepcopy(transform) tr.addQuery(stripProxy(bk)) self.appendTransform(tr)
def addQuery(self, transform, bkQuery, associate=True): """Allows the user to add multiple transforms corresponding to the list of BKQuery type objects given in the second argument. The first argument is a transform object to use as the basis for the creation of further transforms.""" if not isinstance(transform, LHCbTransform): raise GangaException( None, 'First argument must be an LHCbTransform object to use as the basis for establishing the new transforms' ) # Check if the template transform is associated with the Task try: self.transforms.index(transform) except: if associate: logger.info( 'The transform is not associated with this Task, doing so now.' ) self.appendTransform(transform) # Check if the BKQuery input is correct and append/update if type(bkQuery) is not list: bkQuery = [bkQuery] for bk in bkQuery: if not isType(bk, BKQuery): raise GangaAttributeError( None, 'LHCbTransform expects a BKQuery object or list of BKQuery objects passed to the addQuery method' ) if len(transform.queries) != 0: # If template has no query itself logger.info('Attaching query to transform') transform.addQuery(stripProxy(bk)) else: # Duplicate from template logger.info('Duplicating transform to add new query.') tr = deepcopy(transform) tr.addQuery(stripProxy(bk)) self.appendTransform(tr)
def updateQuery(self, resubmit=False): """Update the dataset information of the transforms. This will include any new data in the processing or re-run jobs that have data which has been removed.""" if len(self.queries) == 0: raise GangaException( None, 'Cannot call updateQuery() on an LHCbTransform without any queries') if self._getParent() != None: logger.info('Retrieving latest bookkeeping information for transform %i:%i, please wait...' % ( self._getParent().id, self.getID())) else: logger.info( 'Retrieving latest bookkeeping information for transform, please wait...') # check we have an input DS per BK Query while len(self.queries) > len(self.inputdata): self.inputdata.append(LHCbDataset()) # loop over the queries and add fill file lists for id, query in enumerate(self.queries): # Get the latest dataset latest_dataset = query.getDataset() # Compare to previous inputdata, get new and removed logger.info( 'Checking for new and removed data for query %d, please wait...' % self.queries.index(query)) dead_data = LHCbDataset() new_data = LHCbDataset() # loop over the old data and compare new_data.files += latest_dataset.difference( self.inputdata[id]).files dead_data.files += self.inputdata[ id].difference(latest_dataset).files # for dead data, find then kill/remove any associated jobs # loop over units and check any associated with this DS # TODO: Follow through chained tasks for unit in self.units: # associted unit if unit.input_datset_index != id: continue # find the job if len(unit.active_job_ids) == 0: continue # check the data for f in dead_data.files: if f in unit.inputdata.files: # kill the job job = GPI.jobs(unit.active_job_ids[0]) if job.status in ['submitted', 'running']: job.kill() # forget the job unit.prev_job_ids.append(unit.active_job_ids[0]) unit.active_job_ids = [] break # in any case, now just set the DS files to the new set self.inputdata[id].files = [] self.inputdata[id].files = latest_dataset.files
def updateQuery(self, resubmit=False): """Update the dataset information of the transforms. This will include any new data in the processing or re-run jobs that have data which has been removed.""" if len(self.queries) == 0: raise GangaException( None, 'Cannot call updateQuery() on an LHCbTransform without any queries' ) if self._getParent() != None: logger.info( 'Retrieving latest bookkeeping information for transform %i:%i, please wait...' % (self._getParent().id, self.getID())) else: logger.info( 'Retrieving latest bookkeeping information for transform, please wait...' ) # check we have an input DS per BK Query while len(self.queries) > len(self.inputdata): self.inputdata.append(LHCbDataset()) # loop over the queries and add fill file lists for id, query in enumerate(self.queries): # Get the latest dataset latest_dataset = query.getDataset() # Compare to previous inputdata, get new and removed logger.info( 'Checking for new and removed data for query %d, please wait...' % self.queries.index(query)) dead_data = LHCbDataset() new_data = LHCbDataset() # loop over the old data and compare new_data.files += latest_dataset.difference( self.inputdata[id]).files dead_data.files += self.inputdata[id].difference( latest_dataset).files # for dead data, find then kill/remove any associated jobs # loop over units and check any associated with this DS # TODO: Follow through chained tasks for unit in self.units: # associted unit if unit.input_datset_index != id: continue # find the job if len(unit.active_job_ids) == 0: continue # check the data for f in dead_data.files: if f in unit.inputdata.files: # kill the job job = GPI.jobs(unit.active_job_ids[0]) if job.status in ['submitted', 'running']: job.kill() # forget the job unit.prev_job_ids.append(unit.active_job_ids[0]) unit.active_job_ids = [] break # in any case, now just set the DS files to the new set self.inputdata[id].files = [] self.inputdata[id].files = latest_dataset.files