def initializeFromDatasets(self,dataset_list): """ For each dataset in the dataset_list a unit is created. The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field.""" if not type(dataset_list) is list: logger.error("dataset_list must be a python list: ['ds1','ds2',...]") return # check for primary transforms primary_tfs = [] for tf in self.transforms: if len(tf.required_trfs) == 0: primary_tfs.append( tf ) if len(primary_tfs) == 0: logger.error("No primary transforms specified. Yout need at least one before the Task can be initialised.") return unit_num = 0 for dset in dataset_list: dset = dset.strip() try: if "*" in dset: logger.error("WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!") try: if dset[-1] == "/": tid_datasets = dq2.listDatasetsInContainer(dset) else: tid_datasets = dq2.listDatasetsInContainer(dset+"/") except DQUnknownDatasetException: dslist = dq2.listDatasets(dset).keys() if len(dslist) == 0: logger.error("Dataset %s not found!" % dset) return tid_datasets = [ds for ds in dslist if "_tid" in ds and not "_sub" in ds] if len(tid_datasets) == 0: if len(dslist) > 1: logger.error("Found no tid dataset but multiple datasets match %s*!" % dset) return tid_datasets = [dslist[0]] except Exception as e: logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e)) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) if len(tid_datasets) == 0: logger.error("No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset) return prev_num = unit_num for tf in primary_tfs: unit_num = prev_num for ds in tid_datasets: tf.addUnit("Unit_%d" % unit_num, ds) unit_num += 1
def checkOutputContainers(self): """Go through all transforms and check all datasets are registered""" logger.info("Cleaning out overall Task container...") try: dslist = [] dq2_lock.acquire() try: dslist = dq2.listDatasetsInContainer(self.getContainerName()) except: dslist = [] try: dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error("Problem cleaning out Task container: %s %s", x.__class__, x) except DQException as x: logger.error('DQ2 Problem cleaning out Task container: %s %s' %( x.__class__, x)) finally: dq2_lock.release() logger.info("Checking output data has been registered. This can take a few minutes...") for trf in self.transforms: logger.info("Checking containers in Tranform %d..." % trf.getID() ) trf.checkOutputContainers()
def initializeFromDatasets(self, dset_list, template=None, using_jedi=True): """Initialise the trf with the given dataset list, creating a unit for each DS""" for ds in dset_list: if ds[-1] == "/": if not using_jedi: try: tid_datasets = dq2.listDatasetsInContainer(ds) except DQUnknownDatasetException: logger.error("dataset container %s not found" % ds) logger.info("Found %i datasets matching %s..." % (len(tid_datasets), ds)) for ds2 in tid_datasets: self.addUnit('.'.join(ds.split(".")[1:-1]), ds2, template) else: self.addUnit('.'.join(ds[:-1].split(".")[1:]), ds, template) else: self.addUnit('.'.join(ds.split(".")[1:-1]), ds, template)
def checkOutputContainers(self): """Go through all transforms and check all datasets are registered""" logger.info("Cleaning out overall Task container...") try: dslist = [] dq2_lock.acquire() try: dslist = dq2.listDatasetsInContainer(self.getContainerName()) except: dslist = [] try: dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error("Problem cleaning out Task container: %s %s", x.__class__, x) except DQException as x: logger.error('DQ2 Problem cleaning out Task container: %s %s' % (x.__class__, x)) finally: dq2_lock.release() logger.info( "Checking output data has been registered. This can take a few minutes..." ) for trf in self.transforms: logger.info("Checking containers in Tranform %d..." % trf.getID()) trf.checkOutputContainers()
def checkOutputContainers(self): """Go through all completed units and make sure datasets are registered as required""" logger.info("Cleaning out transform %d container..." % self.getID()) try: dslist = [] dq2_lock.acquire() try: dslist = dq2.listDatasetsInContainer(self.getContainerName()) except: dslist = [] try: dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist ) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error("Problem cleaning out Transform container: %s %s", x.__class__, x) except DQException as x: logger.error('DQ2 Problem cleaning out Transform container: %s %s' %( x.__class__, x)) finally: dq2_lock.release() logger.info("Checking output data has been registered for Transform %d..." % self.getID()) for unit in self.units: if len(unit.active_job_ids) == 0: continue if unit.status == "completed" and GPI.jobs(unit.active_job_ids[0]).outputdata and GPI.jobs(unit.active_job_ids[0]).outputdata._impl._name == "DQ2OutputDataset": logger.info("Checking containers in Unit %d..." % unit.getID() ) unit.registerDataset()
def getOutputDatasetList(self): """Return a list of the output datasets associated with this unit""" ds_list = [] for cont in self.getContainerList(): ds_list += dq2.listDatasetsInContainer(cont) return ds_list
def checkForSubmission(self): """Additional checks for unit submission""" # call the base class if not super(AtlasUnit, self).checkForSubmission(): return False # check that parent units are complete because otherwise, when we check for submission to do submissions first (ITransform.update) # datasets may not have been created yet if not self.checkParentUnitsAreComplete(): return False # Add a check for chain units to have frozen their input DS if len( self.req_units ) > 0 and self.inputdata._name == "DQ2Dataset" and not self.inputdata.tag_info: # check datasets are frozen for uds in self.inputdata.dataset: try: dq2_lock.acquire() try: # list datasets in container ds_list = dq2.listDatasetsInContainer(uds) cont_ok = True for ds in ds_list: # find locations and check if frozen loc_dict = dq2.listDatasetReplicas(ds) locations = [] for loc in loc_dict[loc_dict.keys()[0]]: locations += loc_dict[loc_dict.keys()[0]][loc] ds_ok = False for loc in locations: if loc == "": continue datasetsiteinfo = dq2.listFileReplicas(loc, ds) if datasetsiteinfo[0]['found'] != None: ds_ok = True break if not ds_ok: cont_ok = False break except: logger.warning( "Unable to check if datasets are frozen") cont_ok = False finally: dq2_lock.release() # at least one dataset wasn't frozen if not cont_ok: return False return True
def checkForSubmission(self): """Additional checks for unit submission""" # call the base class if not super(AtlasUnit,self).checkForSubmission(): return False # check that parent units are complete because otherwise, when we check for submission to do submissions first (ITransform.update) # datasets may not have been created yet if not self.checkParentUnitsAreComplete(): return False # Add a check for chain units to have frozen their input DS if len(self.req_units) > 0 and self.inputdata._name == "DQ2Dataset" and not self.inputdata.tag_info: # check datasets are frozen for uds in self.inputdata.dataset: try: dq2_lock.acquire() try: # list datasets in container ds_list = dq2.listDatasetsInContainer(uds) cont_ok = True for ds in ds_list: # find locations and check if frozen loc_dict = dq2.listDatasetReplicas(ds) locations = [] for loc in loc_dict[ loc_dict.keys()[0] ]: locations += loc_dict[ loc_dict.keys()[0] ][loc] ds_ok = False for loc in locations: if loc == "": continue datasetsiteinfo = dq2.listFileReplicas(loc, ds) if datasetsiteinfo[0]['found'] != None: ds_ok = True break if not ds_ok: cont_ok = False break except: logger.warning("Unable to check if datasets are frozen") cont_ok = False finally: dq2_lock.release() # at least one dataset wasn't frozen if not cont_ok: return False return True
def listAllDatasets(self): "List all datasets in container of this transform" ds_list = [] try: try: dq2_lock.acquire() ds_list = dq2.listDatasetsInContainer(self.getContainerName()) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error('Problem finding datasets associated with TRF container %s: %s %s' %( self.getContainerName(), x.__class__, x)) except DQException as x: logger.error('DQ2 Problem finding datasets associated with TRF container %s: %s %s' %( self.getContainerName(), x.__class__, x)) finally: dq2_lock.release() return ds_list
def initializeFromContainer(self, dset, template = None, using_jedi = True): """Initialise the trf with given container, creating a unit for each DS""" if dset[-1] != "/": logger.error("Please supply a container!") return if not using_jedi: try: tid_datasets = dq2.listDatasetsInContainer(dset) except DQUnknownDatasetException: logger.error("dataset container %s not found" % dset) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) for ds in tid_datasets: self.addUnit('.'.join( ds.split(".")[1:-1] ), ds, template) else: self.addUnit('.'.join( dset[:-1].split(".")[1:] ), dset, template)
def checkOutputContainers(self): """Go through all completed units and make sure datasets are registered as required""" logger.info("Cleaning out transform %d container..." % self.getID()) try: dslist = [] dq2_lock.acquire() try: dslist = dq2.listDatasetsInContainer(self.getContainerName()) except: dslist = [] try: dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error("Problem cleaning out Transform container: %s %s", x.__class__, x) except DQException as x: logger.error( 'DQ2 Problem cleaning out Transform container: %s %s' % (x.__class__, x)) finally: dq2_lock.release() logger.info( "Checking output data has been registered for Transform %d..." % self.getID()) for unit in self.units: if len(unit.active_job_ids) == 0: continue if unit.status == "completed" and GPI.jobs( unit.active_job_ids[0]).outputdata and GPI.jobs( unit.active_job_ids[0] ).outputdata._impl._name == "DQ2OutputDataset": logger.info("Checking containers in Unit %d..." % unit.getID()) unit.registerDataset()
def initializeFromContainer(self, dset, template=None, using_jedi=True): """Initialise the trf with given container, creating a unit for each DS""" if dset[-1] != "/": logger.error("Please supply a container!") return if not using_jedi: try: tid_datasets = dq2.listDatasetsInContainer(dset) except DQUnknownDatasetException: logger.error("dataset container %s not found" % dset) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) for ds in tid_datasets: self.addUnit('.'.join(ds.split(".")[1:-1]), ds, template) else: self.addUnit('.'.join(dset[:-1].split(".")[1:]), dset, template)
def initializeFromDatasets(self, dset_list, template = None, using_jedi = True): """Initialise the trf with the given dataset list, creating a unit for each DS""" for ds in dset_list: if ds[-1] == "/": if not using_jedi: try: tid_datasets = dq2.listDatasetsInContainer(ds) except DQUnknownDatasetException: logger.error("dataset container %s not found" % ds) logger.info("Found %i datasets matching %s..." % (len(tid_datasets), ds)) for ds2 in tid_datasets: self.addUnit('.'.join( ds.split(".")[1:-1] ), ds2, template) else: self.addUnit('.'.join( ds[:-1].split(".")[1:] ), ds, template) else: self.addUnit('.'.join( ds.split(".")[1:-1] ), ds, template)
def listAllDatasets(self): "List all datasets in container of this transform" ds_list = [] try: try: dq2_lock.acquire() ds_list = dq2.listDatasetsInContainer(self.getContainerName()) except DQContainerDoesNotHaveDataset: pass except Exception as x: logger.error( 'Problem finding datasets associated with TRF container %s: %s %s' % (self.getContainerName(), x.__class__, x)) except DQException as x: logger.error( 'DQ2 Problem finding datasets associated with TRF container %s: %s %s' % (self.getContainerName(), x.__class__, x)) finally: dq2_lock.release() return ds_list
def initializeFromDatasets(self, dataset_list): """ For each dataset in the dataset_list a unit is created. The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field.""" if not type(dataset_list) is list: logger.error( "dataset_list must be a python list: ['ds1','ds2',...]") return # check for primary transforms primary_tfs = [] for tf in self.transforms: if len(tf.required_trfs) == 0: primary_tfs.append(tf) if len(primary_tfs) == 0: logger.error( "No primary transforms specified. Yout need at least one before the Task can be initialised." ) return unit_num = 0 for dset in dataset_list: dset = dset.strip() try: if "*" in dset: logger.error( "WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!" ) try: if dset[-1] == "/": tid_datasets = dq2.listDatasetsInContainer(dset) else: tid_datasets = dq2.listDatasetsInContainer(dset + "/") except DQUnknownDatasetException: dslist = dq2.listDatasets(dset).keys() if len(dslist) == 0: logger.error("Dataset %s not found!" % dset) return tid_datasets = [ ds for ds in dslist if "_tid" in ds and not "_sub" in ds ] if len(tid_datasets) == 0: if len(dslist) > 1: logger.error( "Found no tid dataset but multiple datasets match %s*!" % dset) return tid_datasets = [dslist[0]] except Exception as e: logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e)) return logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset)) if len(tid_datasets) == 0: logger.error( "No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset) return prev_num = unit_num for tf in primary_tfs: unit_num = prev_num for ds in tid_datasets: tf.addUnit("Unit_%d" % unit_num, ds) unit_num += 1