def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None): if working_dir.endswith('/'): working_dir = working_dir[:-1] if new_name is None: new_name = '_'.join(os.path.basename(working_dir).split('_')[2:]) if not new_name.endswith('.root'): new_name += '.root' if new_dir is not None: new_name = os.path.join(new_dir, new_name) expected = crab_get_njobs(working_dir) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) res = crab_command('out', '--xrootd', dir=working_dir) if 'No files to retrieve.' in res: files = [] else: files = [x.strip() for x in res.split('\n') if x.strip() and '.root' in x] if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files] jobs.sort() expected = range(1, expected+1) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), crabify_list(jobs)) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing)) l = len(files) if l == 0: msg = 'crab_hadd: no files found in %s' % working_dir if raise_on_empty: raise CRABToolsException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' cmd = 'xrdcp -s %s %s' % (files[0], new_name) os.system(cmd) os.chmod(new_name, 0644) else: hadd(new_name, files, chunk_size) return new_name
def cs_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None): if working_dir.endswith('/'): working_dir = working_dir[:-1] if new_name is None: new_name = '_'.join(os.path.basename(working_dir).split('_')[1:]) if not new_name.endswith('.root'): new_name += '.root' if new_dir is not None: new_name = os.path.join(new_dir, new_name) expected = cs_njobs(working_dir) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) files = glob(os.path.join(working_dir, '*.root')) if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files] jobs.sort() expected = range(expected) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), jobs) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing)) l = len(files) if l == 0: msg = 'cs_hadd: no files found in %s' % working_dir if raise_on_empty: raise CSHelpersException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' cmd = 'cp %s %s' % (files[0], new_name) os.system(cmd) os.chmod(new_name, 0644) else: hadd(new_name, files, chunk_size) return new_name
def cs_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, range_filter=None): working_dir, new_name, new_dir = cs_hadd_args(working_dir, new_name, new_dir) expected, files = cs_hadd_files(working_dir, range_filter=range_filter) result = HaddBatchResult('condor', working_dir, new_name, new_dir, expected, files) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files] jobs.sort() expected = range(expected) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % ( len(jobs), jobs) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join( str(j) for j in missing)) l = len(files) if l == 0: result.success = False msg = 'cs_hadd: no files found in %s' % working_dir if raise_on_empty: raise CSHelpersException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' if files[0].startswith('root://'): cmd = 'xrdcp -s %s %s' % (files[0], new_name) else: cmd = 'cp %s %s' % (files[0], new_name) result.success = os.system(cmd) == 0 if result.success and not new_name.startswith('root://'): os.chmod(new_name, 0644) else: result.success = hadd(new_name, files) return result
def cs_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, range_filter=None): working_dir, new_name, new_dir = cs_hadd_args(working_dir, new_name, new_dir) expected, files = cs_hadd_files(working_dir, range_filter=range_filter) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files] jobs.sort() expected = range(expected) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), jobs) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing)) l = len(files) if l == 0: msg = 'cs_hadd: no files found in %s' % working_dir if raise_on_empty: raise CSHelpersException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' if files[0].startswith('root://'): cmd = 'xrdcp -s %s %s' % (files[0], new_name) else: cmd = 'cp %s %s' % (files[0], new_name) os.system(cmd) os.chmod(new_name, 0644) else: hadd(new_name, files) return new_name
def doit(path, out_fn): x = fromtree(os.path.join(path, 'observed.root')) if len(x) != 1: print 'using observed_byhand for this!' x = fromtree(os.path.join(path, 'observed_byhand.root')) obs = x[0] exp_fn = os.path.join(path, 'expected.root') if not os.path.isfile(exp_fn): exp_fns = glob(exp_fn.replace('.root', '_*.root')) if len(exp_fns) != njobs: raise ValueError('only found %i files, expected %i' % (len(exp_fns), njobs)) if not hadd(exp_fn, exp_fns): raise ValueError('problem hadding %s from %s files' % (exp_fn, len(exp_fns))) exp = fromtree(exp_fn) if len(exp) != njobs * ntoysperjob: raise ValueError( 'unexpected number of points in %s' % exp_fn) # this can't be given the asserts in fromtree right now stats(out_fn, obs, exp)
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, lpc_shortcut=False, range_filter=None): working_dir, new_name, new_dir = crab_hadd_args(working_dir, new_name, new_dir) expected, files = crab_hadd_files(working_dir, lpc_shortcut, range_filter=range_filter) result = HaddBatchResult('crab', working_dir, new_name, new_dir, expected, files) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) automatic_splitting = False pprinted = False jobs = [] for f in files: jobnum = f.split('_')[-1].split('.root')[0] if crab_global_options.support_automatic_splitting and '-' in jobnum: automatic_splitting = True if not pprinted: pprint(files) pprinted = True it, jobnum = jobnum.split('-') it, jobnum = int(it), int(jobnum) assert it >= 1 # probe jobs "0-*" should not show up jobnum = it * 10000 + jobnum else: jobnum = int(jobnum) jobs.append(jobnum) jobs.sort() expected = range(1, expected + 1) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % ( len(jobs), crabify_list(jobs)) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join( str(j) for j in missing)) l = len(files) if l == 0: result.success = False msg = 'crab_hadd: no files found in %s' % working_dir if raise_on_empty: raise CRABToolsException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' cmd = 'xrdcp -s %s %s' % (files[0], new_name) result.success = os.system(cmd) == 0 if result.success and not new_name.startswith('root://'): os.chmod(new_name, 0644) else: result.success = hadd(new_name, files) if automatic_splitting: n = norm_from_file(new_name) sn, s = fn_to_sample(Samples, new_name) if not s: print colors.yellow( "\tnorm_from_file returns %r, couldn't get sample %s" % (n, sn)) else: no1, no2 = s.datasets['main'].nevents_orig, s.datasets[ 'miniaod'].nevents_orig if n == no1 or n == no2: print '\tnorm_from_file returns nevents_orig = %i' % n else: print colors.yellow( '\tnorm_from_file returns %r while %s.nevents_orig is %i (main) %i (miniaod' % (n, sn, no1, no2)) return result
parser.add_argument('positional', nargs='*') parser.add_argument('-l', '--list', help='File containing list of filenames for input.') options = parser.parse_args() output_fn = None input_files = [] if options.list: if len(options.positional) != 1: raise ValueError( 'when doing --list, only one positional argument allowed (the output filename)' ) output_fn = options.positional[0] for line in file(options.list): line = line.strip() if line: input_files.append(line) else: if len(options.positional) < 2: parser.print_usage() sys.exit(1) output_fn = options.positional[0] input_files = options.positional[1:] hadd(output_fn, input_files)
%(prog)s output.root file1.root file2.root [file3.root ... fileN.root] -or- %(prog)s -l input_file_list.txt output.root''') parser.add_argument('positional', nargs='*') parser.add_argument('-l', '--list', help='File containing list of filenames for input.') options = parser.parse_args() output_fn = None input_files = [] if options.list: if len(options.positional) != 1: raise ValueError('when doing --list, only one positional argument allowed (the output filename)') output_fn = options.positional[0] for line in file(options.list): line = line.strip() if line: input_files.append(line) else: if len(options.positional) < 2: parser.print_usage() sys.exit(1) output_fn = options.positional[0] input_files = options.positional[1:] hadd(output_fn, input_files)
new_vh_paths[path_mo.group(1)] += 1 vh_fn = os.path.join(dn, 'vertex_histos_%s.root' % jobnum) if verbosity >= 3: print dn, bn, jobnum, vh_fn if not eos.exists(vh_fn): raise IOError('no %r for %r' % vh_fn, fn) vh_fns.append(eos.canon(vh_fn)) new_vh_path = new_vh_paths.most_common(1)[0][0] new_vh_fn = eos.canon(os.path.join(new_vh_path, 'vertex_histos.root')) if verbosity >= 3: print new_vh_fn if eos.exists(new_vh_fn): raise ValueError('exists already: %r' % new_vh_fn) hadds.append((new_vh_fn, vh_fns)) if rm: print 'hadd and rm these:' else: print 'hadd these:' pprint(hadds) if rm and raw_input('ok? ').strip().lower() != 'y': sys.exit('giving up') for new_fn, fns in hadds: if hadd(new_fn, fns) and rm: for fn in fns: eos.rm(fn)
#!/usr/bin/env python import sys from JMTucker.Tools.hadd import hadd hadd(sys.argv[1], sys.argv[2:])
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900): if working_dir.endswith('/'): working_dir = working_dir[:-1] if new_name is None: new_name = os.path.basename(working_dir).replace('crab_','') if not new_name.endswith('.root'): new_name += '.root' if new_dir is not None: new_name = os.path.join(new_dir, new_name) expected = crab_get_njobs(working_dir) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) on_resilient = False on_store = False cfg = crab_cfg_parser(working_dir) try: storage_path = cfg.get('USER', 'storage_path') on_resilient = 'resilient' in storage_path except NoOptionError: pass try: storage_element = cfg.get('USER', 'storage_element') on_store = storage_element == 'T3_US_FNALLPC' except NoOptionError: pass files = [] if on_resilient: pfns = [crab_analysis_file_pfn(path) for path in glob.glob(os.path.join(working_dir, 'res/crab_fjr*xml'))] files = ['dcap://cmsdca3.fnal.gov:24145/pnfs/fnal.gov/usr/cms/WAX/resilient/' + pfn.split('/resilient/')[1] for pfn in pfns] # JMTBAD elif on_store: pfns = [crab_analysis_file_pfn(path) for path in glob.glob(os.path.join(working_dir, 'res/crab_fjr*xml'))] files = ['dcap://cmsdca3.fnal.gov:24145/pnfs/fnal.gov/usr/cms/WAX/11/' + pfn.split('/11/')[1] for pfn in pfns] # JMTBAD else: files = glob.glob(os.path.join(working_dir, 'res/*root')) job_nums = defaultdict(lambda: defaultdict(list)) for f in files: f_ = f.split('_') job, num = int(f_[-3]), int(f_[-2]) job_nums[job][num].append(f) sexpected = set(xrange(1,expected+1)) sjobs = set(job_nums) if sjobs != sexpected: print '\033[36;7m files found %r not what expected \033[m' % sorted(sjobs) missing = sorted(sexpected - sjobs) print '\033[36;7m missing: %r \033[m' % missing to_drop = [] for job, nums_and_fs in job_nums.iteritems(): for num, fs in nums_and_fs.iteritems(): if len(fs) > 1: print '\033[36;7m for job %i, more than one file with resub num %i *** will keep latest by mtime *** \033[m' % (job, num) fs.sort(key=lambda f: os.stat(f).st_mtime) good_f = fs[-1] while len(fs) > 1: bad_f = fs.pop(0) to_drop.append((bad_f, good_f)) if len(nums_and_fs) > 1: good_num = max(nums_and_fs) assert len(nums_and_fs[good_num]) == 1 good_f = nums_and_fs[good_num][0] for num, fs in nums_and_fs.iteritems(): if num != good_num: assert len(fs) == 1 to_drop.append((fs[0], good_f)) for f, good_f in to_drop: print '\033[36;7m dropping %s in favor of %s \033[m' % (f, good_f) files.remove(f) l = len(files) if l == 0: msg = 'crab_hadd: no files found in %s' % working_dir if raise_on_empty: raise CrabError(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' cmd = '%scp %s %s' % ('dc' if 'dcap' in files[0] else '', files[0], new_name) os.system(cmd) os.chmod(new_name, 0644) else: hadd(new_name, files, chunk_size) return new_name
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, lpc_shortcut=False, range_filter=None): working_dir, new_name, new_dir = crab_hadd_args(working_dir, new_name, new_dir) expected, files = crab_hadd_files(working_dir, lpc_shortcut, range_filter=range_filter) print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected) if pattern: if '/' not in pattern: pattern = '*/' + pattern files = fnmatch.filter(files, pattern) automatic_splitting = False pprinted = False jobs = [] for f in files: jobnum = f.split('_')[-1].split('.root')[0] if crab_global_options.support_automatic_splitting and '-' in jobnum: automatic_splitting = True if not pprinted: pprint(files) pprinted = True it, jobnum = jobnum.split('-') it, jobnum = int(it), int(jobnum) assert it >= 1 # probe jobs "0-*" should not show up jobnum = it*10000 + jobnum else: jobnum = int(jobnum) jobs.append(jobnum) jobs.sort() expected = range(1, expected+1) if jobs != expected: print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), crabify_list(jobs)) missing = sorted(set(expected) - set(jobs)) print '\033[36;7m %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing)) l = len(files) if l == 0: msg = 'crab_hadd: no files found in %s' % working_dir if raise_on_empty: raise CRABToolsException(msg) else: print '\033[36;7m', msg, '\033[m' elif l == 1: print working_dir, ': just one file found, copying' cmd = 'xrdcp -s %s %s' % (files[0], new_name) os.system(cmd) os.chmod(new_name, 0644) else: hadd(new_name, files) if automatic_splitting: n = norm_from_file(new_name) sn, s = fn_to_sample(Samples, new_name) if not s: print colors.yellow("\tnorm_from_file returns %r, couldn't get sample %s" % (n, sn)) else: no1, no2 = s.datasets['main'].nevents_orig, s.datasets['miniaod'].nevents_orig if n == no1 or n == no2: print '\tnorm_from_file returns nevents_orig = %i' % n else: print colors.yellow('\tnorm_from_file returns %r while %s.nevents_orig is %i (main) %i (miniaod' % (n, sn, no1, no2)) return new_name