def list_locations(self,dataset=None,complete=0): '''List dataset locations''' # expand datasets in case there is a wildcard self.__expand_datasets() return DQ2Dataset.list_locations(self, dataset=dataset, complete=complete)
def get_locations(self, complete=0, backnav=False, overlap=True): '''helper function to access the dataset location''' # expand datasets in case there is a wildcard self.__expand_datasets() logger.debug('getting dataset locations') return DQ2Dataset.get_locations(self,complete=complete, backnav=backnav, overlap=overlap)
def list_locations(self, dataset=None, complete=0): '''List dataset locations''' # expand datasets in case there is a wildcard self.__expand_datasets() return DQ2Dataset.list_locations(self, dataset=dataset, complete=complete)
def get_contents(self, backnav=False, overlap=True): '''Helper function to access dataset content''' # expand datasets in case there is a wildcard self.__expand_datasets() # always get all contents self.number_of_files = 0 return DQ2Dataset.get_contents(self, backnav=backnav, overlap=overlap)
def list_locations_num_files(self, dataset=None, complete=-1, backnav=False): '''List the number of files replicated to the dataset locations''' return DQ2Dataset.list_locations_num_files(self, dataset=dataset, complete=complete, backnav=backnav)
def get_contents(self,backnav=False, overlap=True): '''Helper function to access dataset content''' # expand datasets in case there is a wildcard self.__expand_datasets() # always get all contents self.number_of_files = 0 return DQ2Dataset.get_contents(self, backnav=backnav, overlap=overlap)
def get_replica_listing(self, dataset=None, SURL=True, complete=0, backnav=False): '''Return list of guids/surl replicated dependent on dataset locations''' return DQ2Dataset.get_replica_listing(self, dataset=dataset, SURL=SURL, complete=complete, backnav=backnav)
def get_locations(self, complete=0, backnav=False, overlap=True): '''helper function to access the dataset location''' # expand datasets in case there is a wildcard self.__expand_datasets() logger.debug('getting dataset locations') return DQ2Dataset.get_locations(self, complete=complete, backnav=backnav, overlap=overlap)
def list_contents(self,dataset=None): '''List dataset content''' self.__expand_datasets() return DQ2Dataset.list_contents(self, dataset=dataset)
def list_datasets(self,name,filter=True): '''List datasets names''' return DQ2Dataset.list_datasets(self, name=name, filter=filter)
def dataset_exists(self): return DQ2Dataset.dataset_exists(self)
def __setattr__(self, attr, value): DQ2Dataset.__setattr__(self, attr, value) if attr == 'dataset': self.complete_files_replicas = {}
def get_replica_listing(self,dataset=None,SURL=True,complete=0,backnav=False): '''Return list of guids/surl replicated dependent on dataset locations''' return DQ2Dataset.get_replica_listing(self, dataset=dataset, SURL=SURL, complete=complete, backnav=backnav)
def list_locations_ce(self,dataset=None,complete=0): '''List the CE associated to the dataset location''' return DQ2Dataset.list_locations_ce(self, dataset=dataset, complete=complete)
def list_locations_ce(self, dataset=None, complete=0): '''List the CE associated to the dataset location''' return DQ2Dataset.list_locations_ce(self, dataset=dataset, complete=complete)
def list_contents(self, dataset=None): '''List dataset content''' self.__expand_datasets() return DQ2Dataset.list_contents(self, dataset=dataset)
def list_datasets(self, name, filter=True): '''List datasets names''' return DQ2Dataset.list_datasets(self, name=name, filter=filter)
def list_locations_num_files(self,dataset=None,complete=-1,backnav=False): '''List the number of files replicated to the dataset locations''' return DQ2Dataset.list_locations_num_files(self, dataset=dataset, complete=complete, backnav=backnav)
def list_locations_siteindex(self, dataset=None, timeout=15, days=2): return DQ2Dataset.list_locations_siteindex(self, dataset=dataset, timeout=timeout, days=days)
def list_locations_siteindex(self,dataset=None, timeout=15, days=2): return DQ2Dataset.list_locations_siteindex(self, dataset=dataset, timeout=timeout, days=days)
def _initSubJob(self, masterjob, dataset, lfnList, guidList, subCollection): from Ganga.GPIDev.Lib.Job import Job from GangaAtlas.Lib.ATLASDataset import DQ2Dataset #print "########################" #print "SUBJOB INITIALISED" #print "########################" subjob = Job() subjob.inputsandbox = masterjob.inputsandbox subjob.application = masterjob.application subjob.outputdata = masterjob.outputdata subjob.outputsandbox = masterjob.outputsandbox subjob.backend = masterjob.backend # attributes which are different for each sub-job subjob.inputdata = DQ2Dataset() #subjob.inputdata.datatype = 'DATA' subjob.inputdata.dataset = dataset #print "########################" #print "DATASET USED" #print dataset #print "########################" subjob.inputdata.names = lfnList #print "########################" #print "LFNLIST USED" #print lfnList #print "########################" subjob.inputdata.guids = guidList #print "########################" #print "GUIDLIST USED" #print guidList #print "########################" if self.match_ce == True: #subjob.inputdata.type = 'TNT_LOCAL' # Sort out the possible sites taking into account requirements allowed_sites = [] if subjob.backend.requirements.sites: allowed_sites = subjob.backend.requirements.sites elif subjob.backend.requirements.cloud: allowed_sites = subjob.backend.requirements.list_sites_cloud() else: raise ApplicationConfigurationError(None,'TntJobSplitter requires a cloud or a site to be set - please use the --cloud option, j.backend.requirements.cloud=CLOUDNAME (T0, IT, ES, FR, UK, DE, NL, TW, CA, US, NG) or j.backend.requirements.sites=SITENAME') if subjob.backend.requirements.sites: allowed_sites = subjob.backend.requirements.sites allowed_sites_all = subjob.backend.requirements.list_sites(True,True) # Apply GangaRobot blacklist newsites = [] for site in allowed_sites: if site in allowed_sites_all: newsites.append(site) allowed_sites = newsites # go through and check which sites with the dataset is given by the requirements sub_sites = [] for site in subjob.inputdata.get_locations(): if site in allowed_sites: sub_sites.append(site) if len(sub_sites) == 0: raise ApplicationConfigurationError(None,'TntJobSplitter could not find a location for dataset %s in cloud %s. Try another cloud!' % (subjob.inputdata.dataset, subjob.backend.requirements.cloud)) else: subjob.backend.requirements.sites = sub_sites #print sub_sites else: subjob.inputdata.type = 'TNT_DOWNLOAD' #requires PFNs in sfn:// format subjob.inputsandbox += [ File(os.path.join(subCollection+".root")) ] return subjob