def _from_dbs_and_cache(fn, ds):
    print 'hitting DBS %s for %s' % (ds, fn)
    from JMTucker.Tools.DBS import files_in_dataset
    files = files_in_dataset(ds, instance='phys03' if ds.endswith('/USER') else 'global')
    with gzip_open(fn, 'w') as f:
        for file in files:
            f.write(file)
            f.write('\n')
    return files
def _from_dbs_and_cache(fn, ds):
    print 'hitting DBS %s for %s' % (ds, fn)
    from JMTucker.Tools.DBS import files_in_dataset
    files = files_in_dataset(ds, instance='phys03' if ds.endswith('/USER') else 'global')
    with gzip_open(fn, 'w') as f:
        for file in files:
            f.write(file)
            f.write('\n')
    return files
Exemple #3
0
import os

try:
    print 'getting minbias file list from cache'
    from minbias_files import files
except ImportError:
    print 'nope, getting minbias file list from DBS and caching'
    from JMTucker.Tools.DBS import files_in_dataset
    from JMTucker.Tools.general import to_pickle
    files = files_in_dataset('/MinBias_TuneCUETP8M1_13TeV-pythia8/RunIISummer15GS-MCRUN2_71_V1-v2/GEN-SIM')
    to_pickle(files, 'minbias_files.pkl', comp=True)
    open('minbias_files.py', 'wt').write("import cPickle, gzip; files = cPickle.load(gzip.GzipFile('minbias_files.pkl', 'rb'))\n")
Exemple #4
0
import os

try:
    print 'getting minbias file list from cache'
    from minbias_files import files
except ImportError:
    print 'getting minbias file list from DBS and caching'
    from JMTucker.Tools.DBS import files_in_dataset
    files = files_in_dataset('/MinBias_TuneZ2star_8TeV-pythia6/Summer12-START50_V13-v3/GEN-SIM')
    open('minbias_files.py', 'wt').write('files = %r\n' % files)
#!/usr/bin/env python

import os, sys
from collections import defaultdict
from pprint import pprint
from JMTucker.Tools.DBS import files_in_dataset
from JMTucker.Tools.general import typed_from_argv

instance = typed_from_argv(int)
if instance is None:
    instance = 'global'

datasets = [x for x in sys.argv[1:] if x.count('/') == 3]

for dataset in datasets:
    d = defaultdict(list)
    files = files_in_dataset(dataset, instance)
    print dataset, 'has', len(files), 'files'
    for f in files:
        num = os.path.basename(f).split('_')[1]
        d[num].append(f)
    for k,v in d.iteritems():
        if len(v) > 1:
            print 'duplicate(s) for %s:' % k
            pprint(v)