def get_argdict(comp_data_dir, **args): argdict = dict.fromkeys(args['sim']) for sim in args['sim']: arglist = [] # Get config and simulation files config = 'IC79' # config = comp.simfunctions.sim2cfg(sim) # gcd, files = comp.simfunctions.get_level3_files(sim, testing=True) files = glob.glob( '/data/sim/IceTop/2010/filtered/level2a/CORSIKA-ice-top/{}/00000-00999/Level2a_*.i3.bz2' .format(sim)) files = sorted(files) # Default parameters outdir = '{}/{}_sim/{}'.format(comp_data_dir, config, sim) comp.checkdir(outdir + '/') if args['test']: args['n'] = 2 # List of existing files to possibly check against existing_files = glob.glob('{}/Level2a_*.i3.bz2'.format(outdir)) existing_files.sort() # # Split into batches # batches = [files[i:i + args['n']] # for i in range(0, len(files), args['n'])] if args['test']: files = files[:2] # batches = batches[:2] for fi, f in enumerate(files): # for bi, batch in enumerate(batches): # Name output hdf5 file run = os.path.basename(f).split('.')[-3] # out = '{}/Level2a_IC79_corsika_icetop.00{}.part{}-{}.i3.bz2'.format(outdir, sim, start, end) out = '{}/Level3_IC79_{}_Run{:06d}.i3.gz'.format( outdir, sim, int(run)) # Don't forget to insert GCD file at beginning of FileNameList # batch.insert(0, gcd) # batch = ' '.join(batch) arg = '{} --isMC --do-inice --dataset {} --det IC79 --waveform -o {}'.format( f, sim, out) arglist.append(arg) argdict[sim] = arglist return argdict
def get_argdict(comp_data_dir, **args): argdict = dict.fromkeys(args['sim']) for sim in args['sim']: arglist = [] # Get config and simulation files config = comp.simfunctions.sim2cfg(sim) gcd, files = comp.simfunctions.get_level3_files(sim, testing=True) # Default parameters outdir = '{}/{}_sim/test-files'.format(comp_data_dir, config) comp.checkdir(outdir + '/') if args['test']: args['n'] = 2 # List of existing files to possibly check against existing_files = glob.glob('{}/sim_{}_*.hdf5'.format(outdir, sim)) existing_files.sort() # Split into batches batches = [files[i:i + args['n']] for i in range(0, len(files), args['n'])] if args['test']: batches = batches[:2] for bi, batch in enumerate(batches): # Name output hdf5 file start_index = batch[0].find('Run') + 3 end_index = batch[0].find('.i3.gz') start = batch[0][start_index:end_index] end = batch[-1][start_index:end_index] out = '{}/sim_{}_part{}-{}.hdf5'.format(outdir, sim, start, end) # Don't forget to insert GCD file at beginning of FileNameList batch.insert(0, gcd) batch = ' '.join(batch) arg = '--files {} -s {} -o {}'.format(batch, sim, out) arglist.append(arg) argdict[sim] = arglist return argdict
def make_submit_script(executable, jobID, script_path, condor_dir): comp.checkdir(script_path) lines = ["universe = vanilla\n", "getenv = true\n", "executable = {}\n".format(executable), "arguments = $(ARGS)\n", "log = {}/logs/{}.log\n".format(condor_dir, jobID), "output = /data/user/jbourbeau/composition/condor/outs/{}.out\n".format(jobID), "error = /data/user/jbourbeau/composition/condor/errors/{}.error\n".format(jobID), "notification = Never\n", # "request_memory = 5000\n", "queue \n"] condor_script = script_path with open(condor_script, 'w') as f: f.writelines(lines) return
def get_argdict(comp_data_dir, **args): argdict = dict.fromkeys(args['sim']) for sim in args['sim']: arglist = [] # Get config and simulation files config = 'IC79' files = glob.glob('/data/sim/IceTop/2010/filtered/level2a/CORSIKA-ice-top/{}/*/Level2a_*.i3.bz2'.format(sim)) files = sorted(files) # Default parameters outdir = '{}/{}_sim/{}'.format(comp_data_dir, config, sim) comp.checkdir(outdir + '/') if args['test']: args['n'] = 2 if args['test']: files = files[:2] for fi, f in enumerate(files): # Name output hdf5 file run = os.path.basename(f).split('.')[-3] out = '{}/Level3_IC79_{}_Run{:06d}.i3.gz'.format(outdir, sim, int(run)) # If not overwriting, and outfile already exists, then continue onto next file if not args['overwrite']: if os.path.exists(out): continue arg = '{} --isMC --do-inice --dataset {} --det IC79 --waveform -o {}'.format(f, sim, out) arglist.append(arg) argdict[sim] = arglist return argdict
return def getjobID(jobID, condor_dir): jobID += time.strftime('_%Y%m%d') othersubmits = glob.glob( '{}/submit_scripts/{}_??.submit'.format(condor_dir, jobID)) jobID += '_{:02d}'.format(len(othersubmits) + 1) return jobID if __name__ == "__main__": # Setup global path names mypaths = comp.Paths() comp.checkdir(mypaths.comp_data_dir) # Set up condor directory condor_dir = '/scratch/{}/condor_composition'.format(getpass.getuser()) for directory in ['errors', 'logs', 'outs', 'submit_scripts']: comp.checkdir(condor_dir + '/' + directory + '/') simoutput = comp.simfunctions.getSimOutput() default_sim_list = ['7006', '7579', '7241', '7263', '7791', '7242', '7262', '7851', '7007', '7784'] p = argparse.ArgumentParser( description='Runs level3_process.py on cluster en masse', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=simoutput) p.add_argument('-s', '--sim', dest='sim', nargs='*', choices=default_sim_list, default=default_sim_list,
import time import glob import argparse import os from collections import defaultdict from icecube.weighting.weighting import from_simprod from icecube.weighting.fluxes import GaisserH3a, GaisserH4a import composition as comp if __name__ == "__main__": # Setup global path names mypaths = comp.Paths() comp.checkdir(mypaths.comp_data_dir) p = argparse.ArgumentParser( description='Runs extra modules over a given fileList') p.add_argument('-o', '--outfile', dest='outfile', help='Output file') args = p.parse_args() dataframe_dict = defaultdict(list) # Get simulation information t_sim = time.time() print('Loading simulation information...') file_list = sorted(glob.glob(mypaths.comp_data_dir + '/IT73_sim/files/sim_????.hdf5')) value_keys = ['IceTopMaxSignal',