Ejemplo n.º 1
0
 def cb(dataset, sample):
     ljname = sample.name.ljust(mlen+3)
     try:
         sites = DBS.sites_for_dataset(sample.dataset, json=True)
     except RuntimeError:
         print colors.boldred(ljname + ' DBS problem')
     else:
         print ljname,
         sites.sort(key=lambda x: x['name'])
         for site in sites:
             if DBS.site_is_tape(site):
                 continue
             is_complete = DBS.complete_at_site(site)
             print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ',
         print
Ejemplo n.º 2
0
    def filenames(self):
        fns = self.datasets[self.curr_dataset].filenames
        if not fns:
            try:
                import JMTucker.Tools.SampleFiles as sfns
                x = sfns.get(self.name, self.curr_dataset)
                if x is not None:
                    nfns, fns = x
                    if len(fns) != nfns:
                        raise ValueError('problem with JMTucker.Tools.SampleFiles')
            except ImportError:
                pass

            if not fns:
                print 'hitting DBS for filenames for', self.name, self.curr_dataset, self.dataset
                fns = self.datasets[self.curr_dataset].filenames = DBS.files_in_dataset(self.dataset, self.dbs_inst)
        return fns
Ejemplo n.º 3
0
    def filenames(self):
        fns = self.datasets[self.curr_dataset].filenames
        if not fns:
            try:
                import JMTucker.Tools.SampleFiles as sfns
                x = sfns.get(self.name, self.curr_dataset)
                if x is not None:
                    nfns, fns = x
                    if len(fns) != nfns:
                        raise ValueError('problem with JMTucker.Tools.SampleFiles')
            except ImportError:
                pass

            if not fns:
                print 'hitting DBS for filenames for', self.name, self.curr_dataset, self.dataset
                fns = self.datasets[self.curr_dataset].filenames = DBS.files_in_dataset(self.dataset, self.dbs_inst)
        return fns
Ejemplo n.º 4
0
    dses = ['main']
else:
    dses = ['miniaod']

for ds in dses:
    print colors.bold(ds)
    for sample in Samples.registry.all():
        if not sample.has_dataset(ds):
            continue

        sample.set_curr_dataset(ds)
        if '/None/' in sample.dataset or getattr(sample, 'is_private', False):
            continue

        try:
            sites = DBS.sites_for_dataset(sample.dataset, instance=sample.dbs_inst, json=True)
        except (RuntimeError, ValueError):
            print colors.yellow('%s %s DBS problem' % (sample.name, sample.dataset))
            continue

        if not sites:
            continue

        print sample.name,
        sites.sort(key=lambda site: DBS.site_completions(site, True))
        max_site_completion = DBS.site_completions(sites[-1], True)
        found = False
        for site in sites:
            if DBS.site_is_tape(site):
                continue
Ejemplo n.º 5
0
def main(samples_registry):
    from glob import glob
    from sys import argv
    from pprint import pprint
    from JMTucker.Tools import colors

    if 'merge' in argv:
        samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True)
        out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)]
        out_fn = out_fn[0] if out_fn else 'merge.root'
        norm_to = typed_from_argv(float, default_value=1.)
        norm_path = typed_from_argv(str, default_value='', name='norm_path')
        merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)

    elif 'printmissing' in argv:
        samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)]
        samples.sort()
        look_for_root_files = 'no_root' not in sys.argv
        no_batch_dir, no_root_file = [], []
        for s in samples:
            if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s):
                no_batch_dir.append(s)
            if not os.path.isfile('%s.root' % s):
                no_root_file.append(s)
        if no_batch_dir:
            print colors.yellow('no batch dir for these:')
            for s in no_batch_dir:
                print s
        if look_for_root_files and no_root_file:
            print colors.yellow('no root file for these:')
            for s in no_root_file:
                print s

    elif 'ds' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        print sample.datasets[dataset].dataset

    elif 'file' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        sample.set_curr_dataset(dataset)
        for x in sample.filenames[:typed_from_argv(int, 5)]:
            print x

    elif 'nevents' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        sample.set_curr_dataset(dataset)
        print DBS.numevents_in_dataset(sample.dataset)

    elif 'site' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        dataset = samples_registry.datasets_from_argv()
        if len(dataset) > 1:
            raise ValueError('only zero/one dataset allowed')
        dataset = dataset[0] if len(dataset) == 1 else 'main'
        mlen = max(len(s.name) for s in samples)
        for sample in samples:
            sample.set_curr_dataset(dataset)
            try:
                sites = DBS.sites_for_dataset(sample.dataset, json=True)
            except RuntimeError:
                print sample.name, 'PROBLEM'
                continue
            print sample.name.ljust(mlen+5),
            sites.sort(key=lambda x: x['name'])
            for site in sites:
                if DBS.site_is_tape(site):
                    continue
                is_complete = DBS.complete_at_site(site)
                print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)),

    elif 'samplefiles' in argv:
        # rm a; touch a; for ds in '' miniaod; do for x in qcd ttbar leptonic; do ( samples samplefiles ${x}_samples_2017 $ds >> a ) ; done; done
        # rm a; touch a; for ds in '' miniaod; do for year in 2017 2018; do for x in data auxiliary_data ; do ( samples samplefiles ${x}_samples_${year} $ds >> a ) ; done; done; done
        samples = samples_registry.from_argv(raise_if_none=True)
        dataset = 'main'
        for arg in argv[1:]:
            if arg == 'miniaod' or arg.startswith('ntuple'):
                dataset = arg
                break
        print 'getting files for dataset %s:' % dataset, ', '.join(s.name for s in samples)
        import SampleFiles as sf
        for s in samples:
            d = {}
            if not s.has_dataset(dataset):
                print colors.yellow('no dataset %s for %s' % (dataset, s.name))
                continue
            s.set_curr_dataset(dataset)
            if sf.has(s.name, dataset):
                raise KeyError('SampleFiles already has an entry for %s' % s.name)
            else:
                fns = s.filenames
                print 'DBS has %i files for %s' % (len(fns), s.name)
                d[(s.name, dataset)] = (len(fns), fns)
            print "('%s:%s', '%s')," % (s.name, dataset, sf._enc(d))
Ejemplo n.º 6
0
def main(samples_registry):
    from glob import glob
    from sys import argv
    from pprint import pprint
    from JMTucker.Tools import colors
    from JMTucker.Tools.general import chunks, typed_from_argv

    samples = samples_registry.from_argv()
    datasets = samples_registry.datasets_from_argv()
    def prnt(*x):
        print ' '.join(str(y) for y in x)
    def runem(cb):
        for dataset in datasets:
            for sample in samples:
                if not sample.has_dataset(dataset):
                    print colors.yellow('no dataset %s for %s' % (dataset, sample.name))
                    continue
                sample.set_curr_dataset(dataset)
                cb(dataset, sample)

    if 'merge' in argv:
        samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True)
        out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)]
        out_fn = out_fn[0] if out_fn else 'merge.root'
        norm_to = typed_from_argv(float, default_value=1.)
        norm_path = typed_from_argv(str, default_value='', name='norm_path')
        merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)

    elif 'printmissing' in argv:
        samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)]
        samples.sort()
        look_for_root_files = 'no_root' not in sys.argv
        no_batch_dir, no_root_file = [], []
        for s in samples:
            if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s):
                no_batch_dir.append(s)
            if not os.path.isfile('%s.root' % s):
                no_root_file.append(s)
        if no_batch_dir:
            print colors.yellow('no batch dir for these:')
            for s in no_batch_dir:
                print s
        if look_for_root_files and no_root_file:
            print colors.yellow('no root file for these:')
            for s in no_root_file:
                print s

    elif 'name' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset))

    elif 'ds' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset, sample.dataset))

    elif 'file' in argv:
        runem(lambda dataset, sample: [prnt(sample.name, dataset, x) for x in sample.filenames[:typed_from_argv(int, 5)]])

    elif 'nevents' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset, DBS.numevents_in_dataset(sample.dataset)))

    elif 'files_for_events' in argv:
        rles = typed_from_argv(int, return_multiple=True)
        if len(rles) % 3 != 0:
            raise ValueError('expect list of ints in argv with length divisible by 3 [run1 lumi1 event1 ...]')
        rles = list(chunks(rles,3))
        runem(lambda dataset, sample: prnt(sample.name, dataset, ' '.join(DBS.files_for_events(rles, sample.dataset))))

    elif 'site' in argv:
        mlen = max(len(s.name) for s in samples)
        def cb(dataset, sample):
            ljname = sample.name.ljust(mlen+3)
            try:
                sites = DBS.sites_for_dataset(sample.dataset, json=True)
            except RuntimeError:
                print colors.boldred(ljname + ' DBS problem')
            else:
                print ljname,
                sites.sort(key=lambda x: x['name'])
                for site in sites:
                    if DBS.site_is_tape(site):
                        continue
                    is_complete = DBS.complete_at_site(site)
                    print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ',
                print
        runem(cb)

    elif 'samplefiles' in argv:
        import SampleFiles as sf
        def cb(dataset, sample):
            if sf.has(sample.name, dataset):
                raise KeyError('SampleFiles already has an entry for %s' % sample.name)
            fns = sample.filenames
            print 'DBS has %i files for %s' % (len(fns), sample.name)
            d = {(sample.name, dataset): (len(fns), fns)}
            print "('%s:%s', '%s')," % (sample.name, dataset, sf._enc(d))
        runem(cb)

    elif 'sfhas' in argv:
        neg = 'neg' in argv
        import SampleFiles as sf
        for dataset in datasets:
            for sample in samples:
                if sf.has(sample.name, dataset) != neg:
                    print sample.name