def _from_dbs_and_cache(fn, ds): print 'hitting DBS %s for %s' % (ds, fn) from JMTucker.Tools.DBS import files_in_dataset files = files_in_dataset(ds, instance='phys03' if ds.endswith('/USER') else 'global') with gzip_open(fn, 'w') as f: for file in files: f.write(file) f.write('\n') return files
import os try: print 'getting minbias file list from cache' from minbias_files import files except ImportError: print 'nope, getting minbias file list from DBS and caching' from JMTucker.Tools.DBS import files_in_dataset from JMTucker.Tools.general import to_pickle files = files_in_dataset('/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-MCRUN2_71_V1-v2/GEN-SIM') to_pickle(files, 'minbias_files.pkl', comp=True) open('minbias_files.py', 'wt').write("import cPickle, gzip; files = cPickle.load(gzip.GzipFile('minbias_files.pkl', 'rb'))\n")
import os try: print 'getting minbias file list from cache' from minbias_files import files except ImportError: print 'getting minbias file list from DBS and caching' from JMTucker.Tools.DBS import files_in_dataset files = files_in_dataset('/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM') open('minbias_files.py', 'wt').write('files = %r\n' % files)
#!/usr/bin/env python import os, sys from collections import defaultdict from pprint import pprint from JMTucker.Tools.DBS import files_in_dataset from JMTucker.Tools.general import typed_from_argv instance = typed_from_argv(int) if instance is None: instance = 'global' datasets = [x for x in sys.argv[1:] if x.count('/') == 3] for dataset in datasets: d = defaultdict(list) files = files_in_dataset(dataset, instance) print dataset, 'has', len(files), 'files' for f in files: num = os.path.basename(f).split('_')[1] d[num].append(f) for k,v in d.iteritems(): if len(v) > 1: print 'duplicate(s) for %s:' % k pprint(v)