def check(self, dataset, run_range, verbose=False): fromCache = createDataset("CMS", dataset, ".*root", readcache=True, run_range=run_range) fromDAS = createDataset("CMS", dataset, ".*root", readcache=False, run_range=run_range) filesCache = sorted(list(set(fromCache.listOfGoodFiles()))) filesDAS = sorted(list(set(fromDAS.listOfGoodFiles()))) if verbose: print "%s %r: files cache %d, dbs %d, match %s" % ( dataset, run_range, len(filesCache), len(filesDAS), "OK" if (filesCache == filesDAS) else "FAIL") if not (filesCache == filesDAS): print 'Files in old cache not anymore on DAS:', [ x for x in filesCache if x not in filesDAS ] print 'Files in DAS not in the old cache:', [ x for x in filesDAS if x not in filesCache ] return filesCache == filesDAS
def getFiles(self, dataset, user, pattern, useAAA=False, run_range=None, json=None): # print 'getting files for', dataset,user,pattern ds = createDataset( user, dataset, pattern, readcache=True, run_range=run_range, json=json ) files = ds.listOfGoodFiles() mapping = 'root://eoscms.cern.ch//eos/cms%s' if useAAA: mapping = 'root://cms-xrd-global.cern.ch/%s' return [ mapping % f for f in files]
def check(self, dataset, run_range, verbose=False): fromCache = createDataset("CMS", dataset, ".*root", readcache=True, run_range=run_range) fromDAS = createDataset("CMS", dataset, ".*root", readcache=False, run_range=run_range) filesCache = sorted(list(set(fromCache.listOfGoodFiles()))) filesDAS = sorted(list(set(fromDAS.listOfGoodFiles()))) if verbose: print "%s %r: files cache %d, dbs %d, match %s" % ( dataset, run_range, len(filesCache), len(filesDAS), "OK" if (filesCache == filesDAS) else "FAIL") return filesCache == filesDAS
def getPrimaryDatasetEntries(self, dataset, user, pattern, useAAA=False, run_range=None): # print 'getting files for', dataset,user,pattern ds = createDataset(user, dataset, pattern, True, run_range=run_range) return ds.primaryDatasetEntries
def getFiles(self, dataset, user, pattern, dbsInstance, useAAA=False, run_range=None, json=None, unsafe = False): # print 'getting files for', dataset,user,pattern ds = createDataset( user, dataset, pattern, readcache=True, run_range=run_range, json=json, unsafe = unsafe, dbsInstance=dbsInstance ) files = ds.listOfGoodFiles() mapping = 'root://eoscms.cern.ch//eos/cms%s' if useAAA: mapping = 'root://cms-xrd-global.cern.ch/%s' if self.useLyonAAA: mapping = 'root://lyogrid06.in2p3.fr//dpm/in2p3.fr/home/cms/data%s' return [ str(mapping % f) for f in files]
def datasetToSource(user, dataset, pattern='.*root', readCache=False): # print user, dataset, pattern data = createDataset(user, dataset, pattern, readCache) source = cms.Source( "PoolSource", noEventSort=cms.untracked.bool(True), duplicateCheckMode=cms.untracked.string("noDuplicateCheck"), fileNames=cms.untracked.vstring()) source.fileNames.extend(data.listOfGoodFiles()) return source
def getFilesLFN(self, dataset, user, pattern, useAAA=True, run_range=None, json=None): ds = createDataset(user, dataset, pattern, readcache=True, run_range=run_range, json=json) files = ds.listOfGoodFiles() return files
def datasetToSource( user, dataset, pattern='.*root', readCache=False): # print user, dataset, pattern data = createDataset(user, dataset, pattern, readCache) source = cms.Source( "PoolSource", noEventSort = cms.untracked.bool(True), duplicateCheckMode = cms.untracked.string("noDuplicateCheck"), fileNames = cms.untracked.vstring() ) source.fileNames.extend( data.listOfGoodFiles() ) return source
def getFilesFromDESY(self, dataset, user, pattern, run_range=None, json=None): # print 'getting files for', dataset,user,pattern ds = createDataset(user, dataset, pattern, readcache=True, run_range=run_range, json=json) files = ds.listOfGoodFiles() mapping = 'dcap://dcache-cms-dcap.desy.de/pnfs/desy.de/cms/tier2/%s' return [mapping % f for f in files]
def getPrimaryDatasetEntries(self, dataset, user, pattern, useAAA=False): # print 'getting files for', dataset,user,pattern ds = createDataset( user, dataset, pattern, True ) return ds.primaryDatasetEntries
def getFilesFromDESY(self, dataset, user, pattern, run_range=None): # print 'getting files for', dataset,user,pattern ds = createDataset( user, dataset, pattern, readcache=True, run_range=run_range ) files = ds.listOfGoodFiles() mapping = 'dcap://dcache-cms-dcap.desy.de/pnfs/desy.de/cms/tier2/%s' return [ mapping % f for f in files]
default=None, help='Lists of bad jobs, as [1,5];[2,5,7]') (options,args) = parser.parse_args() if len(args)!=2: print 'please provide the dataset name and the job directory in argument' sys.exit(1) dataset = args[0] allJobsDir = args[1] user = options.user pattern = fnmatch.translate( options.wildcard ) data = createDataset(user, dataset, pattern, options.readcache) badJobs = [] if options.badjoblists is None: badJobs = buildBadJobsList( data ) else: # import pdb; pdb.set_trace() bjlsstr = options.badjoblists.split(';') bjlsstr = filter(lambda x: len(x)>0, bjlsstr) bjls = map(eval, bjlsstr) setOfBadJobs = set() for bjl in bjls: setOfBadJobs.update( set(bjl) ) # print setOfBadJobs
default=None, help='Lists of bad jobs, as [1,5];[2,5,7]') (options, args) = parser.parse_args() if len(args) != 2: print 'please provide the dataset name and the job directory in argument' sys.exit(1) dataset = args[0] allJobsDir = args[1] user = options.user pattern = fnmatch.translate(options.wildcard) data = createDataset(user, dataset, pattern, options.readcache) badJobs = [] if options.badjoblists is None: badJobs = buildBadJobsList(data) else: # import pdb; pdb.set_trace() bjlsstr = options.badjoblists.split(';') bjlsstr = filter(lambda x: len(x) > 0, bjlsstr) bjls = map(eval, bjlsstr) setOfBadJobs = set() for bjl in bjls: setOfBadJobs.update(set(bjl)) # print setOfBadJobs # sys.exit(1)
action = 'store_true', default=False, help='Read from the cache.') parser.add_option("--min-run", dest="min_run", default=-1, type=int, help='When querying DBS, require runs >= than this run') parser.add_option("--max-run", dest="max_run", default=-1, type=int, help='When querying DBS, require runs <= than this run') (options,args) = parser.parse_args() if len(args)!=1: parser.print_help() sys.exit(1) user = options.user name = args[0] info = not options.noinfo run_range = (options.min_run,options.max_run) data = createDataset(user, name, fnmatch.translate( options.wildcard ), options.readcache, options.basedir, run_range=run_range) data.printInfo() data.printFiles(abspath = options.abspath, info = info) pprint.pprint( data.filesAndSizes ) if options.report: pprint.pprint( data.report )