def cb(dataset, sample): ljname = sample.name.ljust(mlen+3) try: sites = DBS.sites_for_dataset(sample.dataset, json=True) except RuntimeError: print colors.boldred(ljname + ' DBS problem') else: print ljname, sites.sort(key=lambda x: x['name']) for site in sites: if DBS.site_is_tape(site): continue is_complete = DBS.complete_at_site(site) print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ', print
def filenames(self): fns = self.datasets[self.curr_dataset].filenames if not fns: try: import JMTucker.Tools.SampleFiles as sfns x = sfns.get(self.name, self.curr_dataset) if x is not None: nfns, fns = x if len(fns) != nfns: raise ValueError('problem with JMTucker.Tools.SampleFiles') except ImportError: pass if not fns: print 'hitting DBS for filenames for', self.name, self.curr_dataset, self.dataset fns = self.datasets[self.curr_dataset].filenames = DBS.files_in_dataset(self.dataset, self.dbs_inst) return fns
dses = ['main'] else: dses = ['miniaod'] for ds in dses: print colors.bold(ds) for sample in Samples.registry.all(): if not sample.has_dataset(ds): continue sample.set_curr_dataset(ds) if '/None/' in sample.dataset or getattr(sample, 'is_private', False): continue try: sites = DBS.sites_for_dataset(sample.dataset, instance=sample.dbs_inst, json=True) except (RuntimeError, ValueError): print colors.yellow('%s %s DBS problem' % (sample.name, sample.dataset)) continue if not sites: continue print sample.name, sites.sort(key=lambda site: DBS.site_completions(site, True)) max_site_completion = DBS.site_completions(sites[-1], True) found = False for site in sites: if DBS.site_is_tape(site): continue
def main(samples_registry): from glob import glob from sys import argv from pprint import pprint from JMTucker.Tools import colors if 'merge' in argv: samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True) out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)] out_fn = out_fn[0] if out_fn else 'merge.root' norm_to = typed_from_argv(float, default_value=1.) norm_path = typed_from_argv(str, default_value='', name='norm_path') merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path) elif 'printmissing' in argv: samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)] samples.sort() look_for_root_files = 'no_root' not in sys.argv no_batch_dir, no_root_file = [], [] for s in samples: if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s): no_batch_dir.append(s) if not os.path.isfile('%s.root' % s): no_root_file.append(s) if no_batch_dir: print colors.yellow('no batch dir for these:') for s in no_batch_dir: print s if look_for_root_files and no_root_file: print colors.yellow('no root file for these:') for s in no_root_file: print s elif 'ds' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) print sample.datasets[dataset].dataset elif 'file' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) sample.set_curr_dataset(dataset) for x in sample.filenames[:typed_from_argv(int, 5)]: print x elif 'nevents' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) sample.set_curr_dataset(dataset) print DBS.numevents_in_dataset(sample.dataset) elif 'site' in argv: samples = samples_registry.from_argv(raise_if_none=True) dataset = samples_registry.datasets_from_argv() if len(dataset) > 1: raise ValueError('only zero/one dataset allowed') dataset = dataset[0] if len(dataset) == 1 else 'main' mlen = max(len(s.name) for s in samples) for sample in samples: sample.set_curr_dataset(dataset) try: sites = DBS.sites_for_dataset(sample.dataset, json=True) except RuntimeError: print sample.name, 'PROBLEM' continue print sample.name.ljust(mlen+5), sites.sort(key=lambda x: x['name']) for site in sites: if DBS.site_is_tape(site): continue is_complete = DBS.complete_at_site(site) print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), elif 'samplefiles' in argv: # rm a; touch a; for ds in '' miniaod; do for x in qcd ttbar leptonic; do ( samples samplefiles ${x}_samples_2017 $ds >> a ) ; done; done # rm a; touch a; for ds in '' miniaod; do for year in 2017 2018; do for x in data auxiliary_data ; do ( samples samplefiles ${x}_samples_${year} $ds >> a ) ; done; done; done samples = samples_registry.from_argv(raise_if_none=True) dataset = 'main' for arg in argv[1:]: if arg == 'miniaod' or arg.startswith('ntuple'): dataset = arg break print 'getting files for dataset %s:' % dataset, ', '.join(s.name for s in samples) import SampleFiles as sf for s in samples: d = {} if not s.has_dataset(dataset): print colors.yellow('no dataset %s for %s' % (dataset, s.name)) continue s.set_curr_dataset(dataset) if sf.has(s.name, dataset): raise KeyError('SampleFiles already has an entry for %s' % s.name) else: fns = s.filenames print 'DBS has %i files for %s' % (len(fns), s.name) d[(s.name, dataset)] = (len(fns), fns) print "('%s:%s', '%s')," % (s.name, dataset, sf._enc(d))
def main(samples_registry): from glob import glob from sys import argv from pprint import pprint from JMTucker.Tools import colors from JMTucker.Tools.general import chunks, typed_from_argv samples = samples_registry.from_argv() datasets = samples_registry.datasets_from_argv() def prnt(*x): print ' '.join(str(y) for y in x) def runem(cb): for dataset in datasets: for sample in samples: if not sample.has_dataset(dataset): print colors.yellow('no dataset %s for %s' % (dataset, sample.name)) continue sample.set_curr_dataset(dataset) cb(dataset, sample) if 'merge' in argv: samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True) out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)] out_fn = out_fn[0] if out_fn else 'merge.root' norm_to = typed_from_argv(float, default_value=1.) norm_path = typed_from_argv(str, default_value='', name='norm_path') merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path) elif 'printmissing' in argv: samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)] samples.sort() look_for_root_files = 'no_root' not in sys.argv no_batch_dir, no_root_file = [], [] for s in samples: if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s): no_batch_dir.append(s) if not os.path.isfile('%s.root' % s): no_root_file.append(s) if no_batch_dir: print colors.yellow('no batch dir for these:') for s in no_batch_dir: print s if look_for_root_files and no_root_file: print colors.yellow('no root file for these:') for s in no_root_file: print s elif 'name' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset)) elif 'ds' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset, sample.dataset)) elif 'file' in argv: runem(lambda dataset, sample: [prnt(sample.name, dataset, x) for x in sample.filenames[:typed_from_argv(int, 5)]]) elif 'nevents' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset, DBS.numevents_in_dataset(sample.dataset))) elif 'files_for_events' in argv: rles = typed_from_argv(int, return_multiple=True) if len(rles) % 3 != 0: raise ValueError('expect list of ints in argv with length divisible by 3 [run1 lumi1 event1 ...]') rles = list(chunks(rles,3)) runem(lambda dataset, sample: prnt(sample.name, dataset, ' '.join(DBS.files_for_events(rles, sample.dataset)))) elif 'site' in argv: mlen = max(len(s.name) for s in samples) def cb(dataset, sample): ljname = sample.name.ljust(mlen+3) try: sites = DBS.sites_for_dataset(sample.dataset, json=True) except RuntimeError: print colors.boldred(ljname + ' DBS problem') else: print ljname, sites.sort(key=lambda x: x['name']) for site in sites: if DBS.site_is_tape(site): continue is_complete = DBS.complete_at_site(site) print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ', print runem(cb) elif 'samplefiles' in argv: import SampleFiles as sf def cb(dataset, sample): if sf.has(sample.name, dataset): raise KeyError('SampleFiles already has an entry for %s' % sample.name) fns = sample.filenames print 'DBS has %i files for %s' % (len(fns), sample.name) d = {(sample.name, dataset): (len(fns), fns)} print "('%s:%s', '%s')," % (sample.name, dataset, sf._enc(d)) runem(cb) elif 'sfhas' in argv: neg = 'neg' in argv import SampleFiles as sf for dataset in datasets: for sample in samples: if sf.has(sample.name, dataset) != neg: print sample.name