def getContainerName(self, max_length = configDQ2['OUTPUTDATASET_NAMELENGTH'] - 2): if self.name == "": name = "task" else: name = self.name # check container name isn't too big dsn = ["user",getNickname(),self.creation_date, name, "id_%i/" % self.id ] if len(".".join(dsn)) > max_length: dsn = ["user",getNickname(),self.creation_date, name[: - (len(".".join(dsn)) - max_length)], "id_%i/" % self.id ] return (".".join(dsn)).replace(":", "_").replace(" ", "").replace(",","_")
def getLatestDBReleaseCaching(): import tempfile import cPickle as pickle from pandatools import Client from GangaAtlas.Lib.Credentials.ProxyHelper import getNickname TMPDIR = tempfile.gettempdir() nickname = getNickname(allowMissingNickname=False) DBRELCACHE = '%s/ganga.latestdbrel.%s'%(TMPDIR,nickname) try: fh = open(DBRELCACHE) dbrelCache = pickle.load(fh) fh.close() if dbrelCache['mtime'] > time.time() - 3600: logger.debug('Loading LATEST DBRelease from local cache') return dbrelCache['atlas_dbrelease'] else: raise Exception() except: logger.debug('Updating local LATEST DBRelease cache') atlas_dbrelease = Client.getLatestDBRelease(False) dbrelCache = {} dbrelCache['mtime'] = time.time() dbrelCache['atlas_dbrelease'] = atlas_dbrelease fh = open(DBRELCACHE,'w') pickle.dump(dbrelCache,fh) fh.close() return atlas_dbrelease
def get_dataset_name(self): task = self._getParent() name_base = [ "user", getNickname(), task.creation_date, "task_%s" % task.id ] if self.inputdata.dataset: subtask_dsname = ".".join(name_base + [ "subtask_%s" % task.transforms.index(self), str(self.inputdata.dataset[0].strip("/")) ]) else: subtask_dsname = ".".join( name_base + ["subtask_%s" % task.transforms.index(self)]) # make sure we keep the name size limit: dq2_config = getConfig("DQ2") max_length_site = len("ALBERTA-WESTGRID-T2_SCRATCHDISK ") max_length = dq2_config['OUTPUTDATASET_NAMELENGTH'] - max_length_site if len(subtask_dsname) > max_length: logger.debug( "Proposed dataset name longer than limit (%d). Restricting dataset name..." % max_length) while len(subtask_dsname) > max_length: subtask_dsname_toks = subtask_dsname.split('.') subtask_dsname = '.'.join( subtask_dsname_toks[:len(subtask_dsname_toks) - 1]) return subtask_dsname
def getJobsForPartitions(self, partitions): j = self.createNewJob(partitions[0]) if len(partitions) >= 1: j.splitter = AnaTaskSplitterJob() j.splitter.subjobs = partitions j.inputdata = self.partitions_data[partitions[0] - 1] if self.partitions_sites: if stripProxy(j.backend)._name == "Panda": if j.backend.site == "AUTO": j.backend.site = self.partitions_sites[partitions[0] - 1] else: j.backend.requirements.sites = self.partitions_sites[ partitions[0] - 1] j.outputdata = self.outputdata if stripProxy( j.backend )._name == "Panda" and j.application.atlas_exetype == "ATHENA": j.outputdata.outputdata = [] #j.outputdata.datasetname = "" task = self._getParent() dsn = [ "user", getNickname(), task.creation_date, "%i.t_%s_%s" % (j.id, task.id, task.transforms.index(self)) ] j.outputdata.datasetname = ".".join(dsn) #if j.outputdata.datasetname: #today = time.strftime("%Y%m%d",time.localtime()) #j.outputdata.datasetname = "%s.%i.%s" % (j.outputdata.datasetname, j.id, today) return [j]
def getLatestDBReleaseCaching(): import tempfile import cPickle as pickle from pandatools import Client from GangaAtlas.Lib.Credentials.ProxyHelper import getNickname TMPDIR = tempfile.gettempdir() nickname = getNickname(allowMissingNickname=False) DBRELCACHE = '%s/ganga.latestdbrel.%s' % (TMPDIR, nickname) try: fh = open(DBRELCACHE) dbrelCache = pickle.load(fh) fh.close() if dbrelCache['mtime'] > time.time() - 3600: logger.debug('Loading LATEST DBRelease from local cache') return dbrelCache['atlas_dbrelease'] else: raise Exception() except: logger.debug('Updating local LATEST DBRelease cache') atlas_dbrelease = Client.getLatestDBRelease(False) dbrelCache = {} dbrelCache['mtime'] = time.time() dbrelCache['atlas_dbrelease'] = atlas_dbrelease fh = open(DBRELCACHE, 'w') pickle.dump(dbrelCache, fh) fh.close() return atlas_dbrelease
def get_container_name(self): name_base = [ "user", getNickname(), self.creation_date, "task_%s" % self.id ] return ".".join(name_base + [self.name]) + "/"
def getContainerName(self): if self.name == "": name = "task" else: name = self.name name_base = ["user",getNickname(),self.creation_date, name, "id_%i" % self.id ] return (".".join(name_base) + "/").replace(" ", "_")
def getContainerName(self): if self.name == "": name = "task" else: name = self.name name_base = [ "user", getNickname(), self.creation_date, name, "id_%i" % self.id ] return (".".join(name_base) + "/").replace(" ", "_")
def getContainerName(self, max_length=configDQ2['OUTPUTDATASET_NAMELENGTH'] - 2): if self.name == "": name = "task" else: name = self.name # check container name isn't too big dsn = [ "user", getNickname(), self.creation_date, name, "id_%i/" % self.id ] if len(".".join(dsn)) > max_length: dsn = [ "user", getNickname(), self.creation_date, name[:-(len(".".join(dsn)) - max_length)], "id_%i/" % self.id ] return (".".join(dsn)).replace(":", "_").replace(" ", "").replace(",", "_")
def get_dataset_name(self): task = self._getParent() name_base = ["user",getNickname(),task.creation_date,"task_%s" % task.id] if self.inputdata.dataset: subtask_dsname = ".".join(name_base +["subtask_%s" % task.transforms.index(self), str(self.inputdata.dataset[0].strip("/"))]) else: subtask_dsname = ".".join(name_base +["subtask_%s" % task.transforms.index(self)]) # make sure we keep the name size limit: dq2_config = getConfig("DQ2") max_length_site = len("ALBERTA-WESTGRID-T2_SCRATCHDISK ") max_length = dq2_config['OUTPUTDATASET_NAMELENGTH'] - max_length_site if len(subtask_dsname) > max_length: logger.debug("Proposed dataset name longer than limit (%d). Restricting dataset name..." % max_length) while len(subtask_dsname) > max_length: subtask_dsname_toks = subtask_dsname.split('.') subtask_dsname = '.'.join(subtask_dsname_toks[:len(subtask_dsname_toks)-1]) return subtask_dsname
def getJobsForPartitions(self, partitions): j = self.createNewJob(partitions[0]) if len(partitions) >= 1: j.splitter = AnaTaskSplitterJob() j.splitter.subjobs = partitions j.inputdata = self.partitions_data[partitions[0]-1] if self.partitions_sites: if stripProxy(j.backend)._name == "Panda": if j.backend.site == "AUTO": j.backend.site = self.partitions_sites[partitions[0]-1] else: j.backend.requirements.sites = self.partitions_sites[partitions[0]-1] j.outputdata = self.outputdata if stripProxy(j.backend)._name == "Panda" and j.application.atlas_exetype == "ATHENA": j.outputdata.outputdata=[] #j.outputdata.datasetname = "" task = self._getParent() dsn = ["user",getNickname(),task.creation_date,"%i.t_%s_%s" % (j.id, task.id, task.transforms.index(self))] j.outputdata.datasetname = ".".join(dsn) #if j.outputdata.datasetname: #today = time.strftime("%Y%m%d",time.localtime()) #j.outputdata.datasetname = "%s.%i.%s" % (j.outputdata.datasetname, j.id, today) return [j]
def master_prepare(self,app,appmasterconfig): # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.')) usertag = configDQ2['usertag'] #usertag='user09' nickname = getNickname(allowMissingNickname=True) self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id) # self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity()) sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') self.library = '%s.lib.tgz' % self.libDataset # check DBRelease # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1: # raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'") # unpack library logger.debug('Creating source tarball ...') tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null') os.mkdir(tmpdir) inputbox=[] if os.path.exists(app.transform_archive): # must add a condition on size. inputbox += [ File(app.transform_archive) ] if app.evgen_job_option: self.evgen_job_option=app.evgen_job_option if os.path.exists(app.evgen_job_option): # locally modified job option file to add to the input sand box inputbox += [ File(app.evgen_job_option) ] self.evgen_job_option=app.evgen_job_option.split("/")[-1] # add input sandbox files if (job.inputsandbox): for file in job.inputsandbox: inputbox += [ file ] # add option files for extFile in job.backend.extOutFile: try: shutil.copy(extFile,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(extFile,tmpdir) # fill the archive for opt_file in inputbox: try: shutil.copy(opt_file.name,tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(opt_file.name,tmpdir) # now tar it up again inpw = job.getInputWorkspace() rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir)) if rc: logger.error('Packing sources failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError(None,'Packing sources failed.') shutil.rmtree(tmpdir) # upload sources logger.debug('Uploading source tarball ...') try: cwd = os.getcwd() os.chdir(inpw.getPath()) rc, output = Client.putFile(sources) if output != 'True': logger.error('Uploading sources %s failed. Status = %d', sources, rc) logger.error(output) raise ApplicationConfigurationError(None,'Uploading archive failed') finally: os.chdir(cwd) # Use Panda's brokerage ## if job.inputdata and len(app.sites)>0: ## # update cloud, use inputdata's ## from dq2.info.TiersOfATLAS import whichCloud,ToACache ## inclouds=[] ## for site in app.sites: ## cloudSite=whichCloud(app.sites[0]) ## if cloudSite not in inclouds: ## inclouds.append(cloudSite) ## # now converting inclouds content into proper brokering stuff. ## outclouds=[] ## for cloudSite in inclouds: ## for cloudID, eachCloud in ToACache.dbcloud.iteritems(): ## if cloudSite==eachCloud: ## cloud=cloudID ## outclouds.append(cloud) ## break ## print outclouds ## # finally, matching with user's wishes ## if len(outclouds)>0: ## if not job.backend.requirements.cloud: # no user wish, update ## job.backend.requirements.cloud=outclouds[0] ## else: ## try: ## assert job.backend.requirements.cloud in outclouds ## except: ## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds))) from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) if job.backend.site == 'AUTO': raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!') # output dataset preparation and registration try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] except: raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site) if not app.dryrun: for outtype in app.outputpaths.keys(): dset=string.replace(app.outputpaths[outtype],"/",".") dset=dset[1:] # dataset registration must be done only once. print "registering output dataset %s at %s" % (dset,outDsLocation) try: Client.addDataset(dset,False,location=outDsLocation) dq2_set_dataset_lifetime(dset, location=outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset) # extend registration to build job lib dataset: print "registering output dataset %s at %s" % (self.libDataset,outDsLocation) try: Client.addDataset(self.libDataset,False,location=outDsLocation) dq2_set_dataset_lifetime(self.libDataset, outDsLocation) except: raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset) ### cacheVer = "-AtlasProduction_" + str(app.prod_release) logger.debug("master job submit?") self.outsite=job.backend.site if app.se_name and app.se_name != "none" and not self.outsite: self.outsite=app.se_name # create build job jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel jspec.homepackage = 'AnalysisTransforms'+cacheVer#+nightVer jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare() jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = 'panda' jspec.assignedPriority = 2000 jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud # jspec.jobParameters = self.args not known yet jspec.jobParameters = '-o %s' % (self.library) if app.userarea: print app.userarea jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea)) else: jspec.jobParameters += ' -i %s' % (sources) jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1) fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) #print "MASTER JOB DETAILS:",jspec.jobParameters return jspec
def get_container_name(self): name_base = ["user",getNickname(),self.creation_date,"task_%s" % self.id] return ".".join(name_base + [self.name]) + "/"
def master_prepare(self, app, appmasterconfig): # PandaTools from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.')) usertag = configDQ2['usertag'] #usertag='user09' nickname = getNickname(allowMissingNickname=True) self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % ( usertag, nickname, commands.getoutput('hostname').split('.')[0], int(time.time()), job.id) # self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity()) sources = 'sources.%s.tar.gz' % commands.getoutput( 'uuidgen 2> /dev/null') self.library = '%s.lib.tgz' % self.libDataset # check DBRelease # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1: # raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'") # unpack library logger.debug('Creating source tarball ...') tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null') os.mkdir(tmpdir) inputbox = [] if os.path.exists(app.transform_archive): # must add a condition on size. inputbox += [File(app.transform_archive)] if app.evgen_job_option: self.evgen_job_option = app.evgen_job_option if os.path.exists(app.evgen_job_option): # locally modified job option file to add to the input sand box inputbox += [File(app.evgen_job_option)] self.evgen_job_option = app.evgen_job_option.split("/")[-1] # add input sandbox files if (job.inputsandbox): for file in job.inputsandbox: inputbox += [file] # add option files for extFile in job.backend.extOutFile: try: shutil.copy(extFile, tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(extFile, tmpdir) # fill the archive for opt_file in inputbox: try: shutil.copy(opt_file.name, tmpdir) except IOError: os.makedirs(tmpdir) shutil.copy(opt_file.name, tmpdir) # now tar it up again inpw = job.getInputWorkspace() rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources), tmpdir)) if rc: logger.error('Packing sources failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError(None, 'Packing sources failed.') shutil.rmtree(tmpdir) # upload sources logger.debug('Uploading source tarball ...') try: cwd = os.getcwd() os.chdir(inpw.getPath()) rc, output = Client.putFile(sources) if output != 'True': logger.error('Uploading sources %s failed. Status = %d', sources, rc) logger.error(output) raise ApplicationConfigurationError( None, 'Uploading archive failed') finally: os.chdir(cwd) # Use Panda's brokerage ## if job.inputdata and len(app.sites)>0: ## # update cloud, use inputdata's ## from dq2.info.TiersOfATLAS import whichCloud,ToACache ## inclouds=[] ## for site in app.sites: ## cloudSite=whichCloud(app.sites[0]) ## if cloudSite not in inclouds: ## inclouds.append(cloudSite) ## # now converting inclouds content into proper brokering stuff. ## outclouds=[] ## for cloudSite in inclouds: ## for cloudID, eachCloud in ToACache.dbcloud.iteritems(): ## if cloudSite==eachCloud: ## cloud=cloudID ## outclouds.append(cloud) ## break ## print outclouds ## # finally, matching with user's wishes ## if len(outclouds)>0: ## if not job.backend.requirements.cloud: # no user wish, update ## job.backend.requirements.cloud=outclouds[0] ## else: ## try: ## assert job.backend.requirements.cloud in outclouds ## except: ## raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds))) from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) if job.backend.site == 'AUTO': raise ApplicationConfigurationError( None, 'site is still AUTO after brokerage!') # output dataset preparation and registration try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] except: raise ApplicationConfigurationError( None, "Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site) if not app.dryrun: for outtype in app.outputpaths.keys(): dset = string.replace(app.outputpaths[outtype], "/", ".") dset = dset[1:] # dataset registration must be done only once. print "registering output dataset %s at %s" % (dset, outDsLocation) try: Client.addDataset(dset, False, location=outDsLocation) dq2_set_dataset_lifetime(dset, location=outDsLocation) except: raise ApplicationConfigurationError( None, "Fail to create output dataset %s. Aborting" % dset) # extend registration to build job lib dataset: print "registering output dataset %s at %s" % (self.libDataset, outDsLocation) try: Client.addDataset(self.libDataset, False, location=outDsLocation) dq2_set_dataset_lifetime(self.libDataset, outDsLocation) except: raise ApplicationConfigurationError( None, "Fail to create output dataset %s. Aborting" % self.libDataset) ### cacheVer = "-AtlasProduction_" + str(app.prod_release) logger.debug("master job submit?") self.outsite = job.backend.site if app.se_name and app.se_name != "none" and not self.outsite: self.outsite = app.se_name # create build job jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel jspec.homepackage = 'AnalysisTransforms' + cacheVer #+nightVer jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare() jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = 'panda' jspec.assignedPriority = 2000 jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud # jspec.jobParameters = self.args not known yet jspec.jobParameters = '-o %s' % (self.library) if app.userarea: print app.userarea jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea)) else: jspec.jobParameters += ' -i %s' % (sources) jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel) matchURL = re.search('(http.*://[^/]+)/', Client.baseURLSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1) fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) #print "MASTER JOB DETAILS:",jspec.jobParameters return jspec