def add_parameter(dagman, param_name, param_range, dtype, validation_ex, merge_ex, **args): validation_job = pycondor.Job('{}_job'.format(param_name), validation_ex, error=error, output=output, log=log, submit=submit, extra_lines=['request_cpus = {}'.format(args['n_jobs'])], verbose=1) dagman.add_job(validation_job) merge_job = pycondor.Job('merge_{}_job'.format(param_name), merge_ex, error=error, output=output, log=log, submit=submit, verbose=1) dagman.add_job(merge_job) # Ensure that the merge script only runs after all the unmerged dataframes have been generated merge_job.add_parent(validation_job) # Add args to job base_arg = '--pipeline {} --param_name {} --param_type {} --cv {} --scoring {}'.format(args['pipeline'], param_name, dtype, args['cv'], args['scoring']) outfiles = [] for value in param_range: outfile = os.path.join(args['outdir'], 'validation-{}-{}-{}-{}-cv{}.csv'.format(args['pipeline'], param_name, value, args['scoring'], args['cv'])) outfiles.append(outfile) validation_job.add_arg(base_arg + ' --param_value {} --n_jobs {} --outfile {}'.format(value, args['n_jobs'], outfile)) merge_infiles_str = ' '.join(outfiles) merge_outfile = os.path.join(args['outdir'], 'validation-{}-{}-{}-cv{}.csv'.format(args['pipeline'], param_name, args['scoring'], args['cv'])) merge_job.add_arg('--infiles {} --outfile {} --overwrite'.format(merge_infiles_str, merge_outfile)) return dagman
def test_build_executeable_not_found_fail(): with pytest.raises(IOError) as excinfo: ex = '/path/to/executable' job = pycondor.Job('jobname', ex) job.build(makedirs=False) error = 'The path {} does not exist...'.format(ex) assert error == str(excinfo.value)
def create_python_script_job( python_script: str, job_name: str, job_args_dict: Dict[str, str], logdir: str, subdir: str, dag: pycondor.Dagman, request_memory: Optional[str] = None, extra_lines: Optional[List[str]] = [], ): """Creates job-node for python script :param request_memory: mem of job eg '16 GB' :param python_script: python script path (eg test.py) :param job_name: unique name for this job :param job_args_dict: {job_arg: arg_val} :param logdir: the dir to store logs :param subdir: the dir to store submit file :param dag: the dag this job is attached to :return: constructed pycondor.Job """ return pycondor.Job( name=job_name, executable=sys.executable, error=logdir, log=logdir, output=logdir, submit=subdir, getenv=True, universe="vanilla", dag=dag, request_memory=request_memory, arguments=f"{python_script} {convert_args_dict_to_str(job_args_dict)}", extra_lines=[f"accounting_group = {ACCOUNTING_GROUP}"] + extra_lines, )
def test_add_parent_fail(): with pytest.raises(ValueError) as excinfo: job = pycondor.Job('jobname', 'jobex') job.add_parent('parentjob') error = 'add_parent() is expecting a Job or Dagman instance. ' + \ 'Got an object of type {}'.format(type('parentjob')) assert error == str(excinfo.value)
def apply_all( datadir, arrname, outdir, workspace, fold_results=None, single_results=None, and_submit=False, ): """Generate BDT response arrays for all ROOT files in DATADIR.""" import glob import shutil import pycondor if len(single_results) > 0 and len(fold_results) > 0: raise ValueError("Cannot use -f and -s together with apply-single") results_flags = None if len(fold_results) > 0: results_flags = "-f {}".format(" -f ".join(fold_results)) elif len(single_results) > 0: results_flags = "-s {}".format(" -s ".join(single_results)) else: raise ValueError("-f or -s required") ws = PosixPath(workspace).resolve() outpath = PosixPath(outdir).resolve() outpath.mkdir(exist_ok=True) datapath = PosixPath(datadir).resolve(strict=True) all_files = glob.glob(f"{datapath}/*.root") arglist = [f"{f} {arrname} {outpath} {results_flags}" for f in all_files] condor_dag = pycondor.Dagman(name="dag_train_scan", submit=str(ws / "sub")) condor_job_scan = pycondor.Job( name="job_apply_all", universe="vanilla", getenv=True, notification="Error", extra_lines=["notify_user = [email protected]"], executable=shutil.which("tdub"), submit=str(ws / "sub"), error=str(ws / "err"), output=str(ws / "out"), log=str(ws / "log"), dag=condor_dag, ) for run in arglist: condor_job_scan.add_arg(f"apply single {run}") if and_submit: condor_dag.build_submit() else: condor_dag.build()
def create_pycondor_job(self): job_name = self.job_name self.extra_lines.extend( _log_output_error_submit_lines(self.log_directory, job_name)) if self.inputs.scheduler.lower() == "condor": self.add_accounting() self.extra_lines.append(f"priority = {self.condor_job_priority}") if self.inputs.email is not None: self.extra_lines.append(f"notify_user = {self.inputs.email}") if self.online_pe: self.extra_lines.append("+Online_CBC_PE_Daily = True") self.requirements.append("((TARGET.Online_CBC_PE_Daily =?= True))") if self.universe != "local" and self.inputs.osg: if self.run_node_on_osg: _osg_lines, _osg_reqs = self._osg_submit_options( self.executable, has_ligo_frames=True) self.extra_lines.extend(_osg_lines) self.requirements.append(_osg_reqs) else: osg_local_node_lines = [ "+flock_local = True", '+DESIRED_Sites = "nogrid"', "+should_transfer_files = NO", ] self.extra_lines.extend(osg_local_node_lines) self.job = pycondor.Job( name=job_name, executable=self.executable, submit=self.inputs.submit_directory, request_memory=self.request_memory, request_disk=self.request_disk, request_cpus=self.request_cpus, getenv=self.getenv, universe=self.universe, initialdir=self.inputs.initialdir, notification=self.notification, requirements=" && ".join(self.requirements), extra_lines=self.extra_lines, dag=self.dag.pycondor_dag, arguments=self.arguments.print(), retry=self.retry, verbose=self.verbose, ) # Hack to allow passing walltime down to slurm setattr(self.job, "slurm_walltime", self.slurm_walltime) logger.debug(f"Adding job: {job_name}")
def test_submit_equality(tmpdir): executable = example_script basename = os.path.basename(executable) name, _ = os.path.splitext(basename) # Create submit file using pycondor.Job job = pycondor.Job(name=name, executable=executable, submit=str(tmpdir), log=str(tmpdir), output=str(tmpdir), error=str(tmpdir), request_memory='3GB') job.build(fancyname=False) with open(job.submit_file, 'r') as f: submit_file_1 = f.readlines() # Create submit file using pycondor submit runner = CliRunner() args = [ '--submit', str(tmpdir), '--output', str(tmpdir), '--error', str(tmpdir), '--log', str(tmpdir), '--request_memory', '3GB', # Don't want to actually submit job '--dryrun', executable, ] result = runner.invoke(submit, args) assert result.exit_code == 0 submit_file_2 = os.path.join(str(tmpdir), '{}.submit'.format(name)) with open(submit_file_2, 'r') as f: submit_file_2 = f.readlines() # Make sure the two submit files are the same assert submit_file_1 == submit_file_2
import pandas as pd import francis.utils as utils f_path = utils.get_francis_path() error = '/scratch/apizzuto/fast_response/condor/error' output = '/scratch/apizzuto/fast_response/condor/output' log = '/scratch/apizzuto/fast_response/condor/log' submit = '/scratch/apizzuto/fast_response/condor/submit' job = pycondor.Job('background_fastresponse_alerts_steady', f_path + 'time_integrated_scripts/steady_background_trials.py', error=error, output=output, log=log, submit=submit, getenv=True, universe='vanilla', verbose=2, request_memory=8000, #request_cpus=5, extra_lines= ['should_transfer_files = YES', 'when_to_transfer_output = ON_EXIT', 'Requirements = (Machine != "node128.icecube.wisc.edu")'] ) sky_files = glob('/data/ana/realtime/alert_catalog_v2/fits_files/Run1*.fits.gz') for rng in range(5): for index in range(len(sky_files)): for smear in [' --smear']: job.add_arg('--rng={} --i={} --ntrials=200{}'.format(rng, index, smear)) dagman = pycondor.Dagman('alert_event_fra_background_steady', submit=submit, verbose=2)
import pycondor if __name__ == "__main__": # Declare the error, output, log, and submit directories for Condor Job error = 'condor/error' output = 'condor/output' log = 'condor/log' submit = 'condor/submit' # Setting up first PyCondor Job job1 = pycondor.Job('examplejob1', 'savelist.py', error=error, output=output, log=log, submit=submit, verbose=2) # Adding arguments to job1 for i in range(10, 100, 10): job1.add_arg('--length {}'.format(i), retry=7) # Setting up second PyCondor Job job2 = pycondor.Job('examplejob2', 'savelist.py', error=error, output=output, log=log, submit=submit, verbose=2)
# submit signal tests to cluster for stacking analysis # scan each time-window of analysis, specify a spectral gamma times = [ '0.01', '0.03', '0.10', '0.32', '1.00', '3.16', '10.00', '31.60', '100.00', '316.00' ] gamma = 3.0 error = '/home/sfahey/condor/error' output = '/home/sfahey/condor/output' log = '/home/sfahey/condor/log' submit = '/home/sfahey/condor/submit' job = pycondor.Job('FRB_stacking_sig', 'frb_sig_tsd_stacking_noRepeater.py', error=error, output=output, log=log, submit=submit, verbose=2, request_memory=7000) for time in times: job.add_arg('-t %.2f -g %.2f' % (float(time), gamma)) dagman = pycondor.Dagman('FRB_stacking_sig', submit=submit, verbose=2) dagman.add_job(job) dagman.build_submit()
binning = args.binning #args = sys.argv #bgParticle = args[1] #nuType = args[2] #binning = args[3] path = "/home/jlazar/condor_logs/energy_delta_theta_hist_bg" error = "%s/error" % path output = "%s/output" % path log = "%s/log" % path submit = "%s/submit" % path runNs = [int(i) for i in np.linspace(0, 9, 10)] mcFile = "/data/user/jlazar/data/solar_WIMP/data/mcRecarray.npy" outfile = "energy_delta_theta_hist_bg_%s_%s.out" % (bgParticle, nuType) run = pycondor.Job( "energy_delta_theta_hist_bg_%s_%s" % (bgParticle, nuType), "/data/user/jlazar/solar_WIMP/get_energy_delta_theta_hist_bg.sh", error=error, output=output, log=log, submit=submit, universe="vanilla", notification="never") for n in runNs: run.add_arg("%s %s %s %s %s" % (n, bgParticle, nuType, binning, mcFile)) run.build() #run.build_submit()
log = "%s/log" % path submit = "%s/submit" % path xlines = [ "request_memory = (NumJobStarts is undefined) ? 2 * pow(2, 10) : 2048 * pow(2, NumJobStarts + 1)", "periodic_release = (HoldReasonCode =?= 21 && HoldReasonSubCode =?= 1001) || HoldReasonCode =?= 21", "periodic_remove = (JobStatus =?= 5 && (HoldReasonCode =!= 34 && HoldReasonCode =!= 21)) || (RequestMemory > 13192)" ] dagman = pycondor.Dagman("e_d_theta", submit=submit, verbose=2) run = pycondor.Job( "energy_delta_theta_hist_signal", "/data/user/jlazar/solar_WIMP/calc_energy_delta_theta_hist_signal.sh", error=error, output=output, log=log, submit=submit, universe="vanilla", notification="never", dag=dagman, verbose=2, extra_lines=xlines) fluxes = [f"ch{ch}-m{m}" for ch in chs for m in ms] print(fluxes) for mcfile in mcfiles: for flux in fluxes: run.add_arg(f"{mcfile} {flux}") dagman.build()
import pycondor if __name__ == "__main__": # Declare the error, output, log, and submit directories for Condor Job error = 'condor/error' output = 'condor/output' log = 'condor/log' submit = 'condor/submit' # Setting up first PyCondor Job job1 = pycondor.Job('examplejob1', 'savelist.py', error=error, output=output, log=log, submit=submit, use_unique_id=True, verbose=2) # Adding arguments to job1 for i in range(10, 100, 10): job1.add_arg('--length {}'.format(i)) # Setting up second PyCondor Job job2 = pycondor.Job('examplejob2', 'savelist.py', error=error, output=output, log=log, submit=submit, verbose=2) # Adding arguments to job1
error = '/scratch/apizzuto/novae/condor/error' output = '/scratch/apizzuto/novae/condor/output' log = '/scratch/apizzuto/novae/condor/log' submit = '/scratch/apizzuto/novae/condor/submit' dagman = pycondor.Dagman('csky_Novae_stacking_jobs', submit=submit, verbose=2) background_jobs = pycondor.Job( 'sensitivity_stacking_novae_greco', '/home/apizzuto/Nova/scripts/stacking/stacking_background.py', error=error, output=output, log=log, submit=submit, getenv=True, universe='vanilla', verbose=2, request_memory=8000, request_cpus=5, extra_lines=[ 'should_transfer_files = YES', 'when_to_transfer_output = ON_EXIT' ], dag=dagman) low_mem_signal = pycondor.Job( 'sensitivity_signal_novae_greco_low_mem', '/home/apizzuto/Nova/scripts/stacking/stacking_signal_trials.py', error=error, output=output, log=log, submit=submit,
def test_add_arg_name_float_fail(): with pytest.raises(ValueError) as excinfo: job = pycondor.Job('jobname', 'jobex') job.add_arg('arg', name=23.12) error = 'name must be a string' assert error == str(excinfo.value)
import pycondor import numpy as np import argparse chs = [5,8,11] ms = [300,500,800,1000,3000,5000,8000,10000] path = "/home/jlazar/condor_logs/realizations/" error = "%s/error" % path output = "%s/output" % path log = "%s/log" % path submit = "%s/submit" % path run = pycondor.Job("realizations", "/data/user/jlazar/solar_WIMP/bash_scripts/data_realizations.sh", error=error, output=output, log=log, submit=submit, universe="vanilla", notification="never" ) for ch in chs: for m in ms: for opt in ['00', '01']: run.add_arg("%s %s %s" % (ch, m, opt)) run.build()
from glob import glob import numpy as np # submit background tests to cluster for max-burst analysis # specify number of jobs, trials per job, and the time-window of analysis t100 = 1000.00 N_trials = 1000 N_jobs = 100 error='/home/sfahey/condor/error' output='/home/sfahey/condor/output' log='/home/sfahey/condor/log' submit='/home/sfahey/condor/submit' job = pycondor.Job('FRB_maxburst_bg','frb_bg_tsd_maxburst_noRepeater.py', error=error, output=output, log=log, submit=submit, verbose=2, request_memory=4000 ) for seed in range(N_jobs): job.add_arg('-t %.2f -r %i -n %i'%(t100, seed, N_trials)) dagman = pycondor.Dagman('FRB_maxburst_bg_dT%.2f'%t100, submit=submit, verbose=2) dagman.add_job(job) dagman.build_submit()
args = p.parse_args() # Define output directories for condor jobs mypaths = comp.Paths() error = mypaths.condor_data_dir + '/error' output = mypaths.condor_data_dir + '/output' log = mypaths.condor_scratch_dir + '/log' submit = mypaths.condor_scratch_dir + '/submit' # Set up pycondor Job ex = os.getcwd() + '/sequential-feature-selection.py' job = pycondor.Job('SFS', ex, error=error, output=output, log=log, submit=submit, request_memory='5GB', extra_lines=['request_cpus = {}'.format(args.n_jobs)], verbose=2) method = ['forward', 'backward'] floating = [True, False] base_arg = '--config {} --pipeline {} --cv {} --scoring {} --n_jobs {}'.format( args.config, args.pipeline, args.cv, args.scoring, args.n_jobs) for method, floating in itertools.product(method, floating): arg = base_arg + ' --method {}'.format(method) if floating: arg += ' --floating' job.add_arg(arg)
p = argparse.ArgumentParser(description=description) p.add_argument('-c', '--config', dest='config', default='IC86.2012', choices=comp.datafunctions.get_data_configs(), help='Detector configuration') p.add_argument( '--composition', dest='composition', default='total', choices=['light', 'heavy', 'total'], help='Whether to make individual skymaps for each composition') args = p.parse_args() job = pycondor.Job(name='random-sample-anisotropy', executable=JOB_EX, error=PYCONDOR_ERROR, output=PYCONDOR_OUTPUT, log=PYCONDOR_LOG, submit=PYCONDOR_SUBMIT) num_trials = 10000 num_trials_per_job = 100 num_splits = num_trials // num_trials_per_job print('num_splits = {}'.format(num_splits)) for random_states in np.array_split(np.arange(num_trials), num_splits): random_states_arg = ' '.join(map(str, random_states)) print('random_states_arg = {}'.format(random_states_arg))
def test_add_arg_int_fail(): with pytest.raises(ValueError) as excinfo: job = pycondor.Job('jobname', 'jobex') job.add_arg(50) error = 'arg must be a string' assert error == str(excinfo.value)
'save_pvals.py') # Create Dagman instance dag_name = 'anisotropy_kstest_{}'.format(args.config) if args.test: dag_name += '_test' dagman = pycondor.Dagman(dag_name, submit=submit, verbose=1) # Create Job for saving ks-test p-values for each trial save_pvals_name = 'save_pvals_{}'.format(args.config) if args.low_energy: save_pvals_name += '_lowenergy' save_pvals_job = pycondor.Job(save_pvals_name, save_pvals_ex, error=error, output=output, log=log, submit=submit, verbose=1) save_pvals_infiles_0 = [] save_pvals_infiles_1 = [] dagman.add_job(save_pvals_job) outdir = os.path.join(comp.paths.comp_data_dir, args.config + '_data', 'anisotropy', 'random_splits') if args.test: outdir = os.path.join(outdir, 'test') for trial_num in range(args.ks_trials): # Create map_maps jobs for this ks_trial
verbose=2) dagman_long_timescale = pycondor.Dagman('FRA_cascade_long_timescale_sens', submit=submit, verbose=2) dagman_fits = pycondor.Dagman('FRA_cascade_fitting_bias', submit=submit, verbose=2) background_short_time_jobs = pycondor.Job( 'fra_background_short_time', script_base + 'cascade_alert_background.py', error=error, output=output, log=log, submit=submit, getenv=True, universe='vanilla', verbose=2, request_memory=6000, request_cpus=5, extra_lines=[ 'should_transfer_files = YES', 'when_to_transfer_output = ON_EXIT' ], dag=dagman_short_timescale) background_long_time_jobs = pycondor.Job( 'fra_long_short_time', script_base + 'cascade_alert_background.py', error=error, output=output, log=log, submit=submit,
#!/usr/bin/env python import pycondor if __name__ == "__main__": # Declare the error, output, log, and submit directories for Condor Job error = 'condor/error' output = 'condor/output' log = 'condor/log' submit = 'condor/submit' # Setting up a PyCondor Job job = pycondor.Job('examplejob', 'savelist.py', error=error, output=output, log=log, submit=submit, verbose=2) # Write all necessary submit files and submit job to Condor job.build_submit()
def test_add_arg_retry_string_fail(): with pytest.raises(ValueError) as excinfo: job = pycondor.Job('jobname', 'jobex') job.add_arg('arg', retry='10') error = 'retry must be an int' assert error == str(excinfo.value)
path = "/home/jlazar/condor_logs/conventional_flux_interpolation/" error = "%s/error" % path output = "%s/output" % path log = "%s/log" % path submit = "%s/submit" % path xlines = ["request_memory = (NumJobStarts is undefined) ? 2 * pow(2, 10) : 2048 * pow(2, NumJobStarts + 1)", "periodic_release = (HoldReasonCode =?= 21 && HoldReasonSubCode =?= 1001) || HoldReasonCode =?= 21", "periodic_remove = (JobStatus =?= 5 && (HoldReasonCode =!= 34 && HoldReasonCode =!= 21)) || (RequestMemory > 13192)" ] dagman = pycondor.Dagman("conventional_flux_interpolation", submit=submit, verbose=2) run = pycondor.Job("conv_interp", "/data/user/jlazar/solar_WIMP/conventional_flux_interpolation.sh", error=error, output=output, log=log, submit=submit, universe="vanilla", notification="never", dag=dagman, verbose=2, extra_lines=xlines ) for mcf in np.genfromtxt("/data/user/jlazar/solar_WIMP/mc_paths.txt", dtype=str): run.add_arg("%s" % (mcf)) dagman.build()
f_path = utils.get_francis_path() # Replace these with your scratch dirs error = '/scratch/apizzuto/fast_response/condor/error' output = '/scratch/apizzuto/fast_response/condor/output' log = '/scratch/apizzuto/fast_response/condor/log' submit = '/scratch/apizzuto/fast_response/condor/submit' low_mem_job = pycondor.Job( 'low_mem_calc_ts_dists', f_path + 'universe/transient_full_2d_ts_sampling.py', error=error, output=output, log=log, submit=submit, getenv=True, universe='vanilla', verbose=2, request_memory=4000, request_cpus=1, extra_lines=[ 'should_transfer_files = YES', 'when_to_transfer_output = ON_EXIT', 'Requirements = (Machine != "node128.icecube.wisc.edu")' ]) high_mem_job = pycondor.Job( 'high_mem_calc_ts_dists', f_path + 'universe/transient_full_2d_ts_sampling.py', error=error, output=output, log=log, submit=submit,
import pycondor import numpy as np from sys import argv as args ch = int(args[1]) m = int(args[2]) nu_types = ["nu", "nuBar"] path = "/home/jlazar/condor_logs/e_d_theta_signal_rescale" error = "%s/error" % path output = "%s/output" % path log = "%s/log" % path submit = "%s/submit" % path run_ns = [int(i) for i in np.linspace(0, 99, 100)] for nt in nu_types: outfile = "ch%s_m%s_%s_energy_delta_theta_hist_signal.out" % (ch, m, nt) run = pycondor.Job("ch%s_m%s_%s_energy_delta_theta_hist_signal" % (ch, m, nt), "/data/user/jlazar/solar_WIMP/test.sh", error=error, output=output, log=log, submit=submit, universe="vanilla", notification="never") for n in run_ns: run.add_arg("%s %s %s %s" % (n, nt, ch, m)) run.build()
def data_processing_dag(config='IC86.2012', batch_size=1000, test=False): base_dir = os.path.join(comp.paths.comp_data_dir, config, 'data', 'test' if test else '') i3_hdf_outdir = os.path.join(base_dir, 'i3_hdf') df_hdf_outdir = os.path.join(base_dir, 'processed_hdf') # Create data processing Jobs / Dagman dag_name = 'data_processing_{}'.format(args.config.replace('.', '-')) dag = pycondor.Dagman(dag_name, submit=PYCONDOR_SUBMIT, verbose=1) process_i3_job = pycondor.Job(name='process_i3', executable=WRAPPER_EX, error=PYCONDOR_ERROR, output=PYCONDOR_OUTPUT, log=PYCONDOR_LOG, submit=PYCONDOR_SUBMIT, getenv=False, dag=dag) save_df_job = pycondor.Job(name='save_dataframe', executable=WRAPPER_EX, error=PYCONDOR_ERROR, output=PYCONDOR_OUTPUT, log=PYCONDOR_LOG, submit=PYCONDOR_SUBMIT, getenv=False, dag=dag) # Ensure that save_df_job doesn't begin until process_i3_job completes save_df_job.add_parent(process_i3_job) run_gen = comp.datafunctions.run_generator(config) if test: run_gen = islice(run_gen, 2) batch_size = 2 max_batches = 2 else: max_batches = None for run in run_gen: # Get files associated with this run gcd = comp.level3_data_GCD_file(config, run) data_file_batches = comp.level3_data_file_batches( config=config, run=run, size=batch_size, max_batches=max_batches) # Process run files in batches for idx, files in enumerate(data_file_batches): # Set up process_i3_job arguments outfile_basename = 'run_{}_part_{:02d}.hdf'.format(run, idx) process_i3_outfile = os.path.join(i3_hdf_outdir, outfile_basename) # Don't forget to insert GCD file at beginning of FileNameList files.insert(0, gcd) files_str = ' '.join(files) process_i3_arg_template = '{ex} --type data --files {i3_files} --outfile {outfile}' process_i3_arg = process_i3_arg_template.format( ex=PROCESS_I3_EX, i3_files=files_str, outfile=process_i3_outfile) process_i3_job.add_arg(process_i3_arg, retry=3) # Set up save_df_job arguments save_df_outfile = os.path.join(df_hdf_outdir, outfile_basename) save_df_arg_template = '{ex} --input {input} --output {output} --type data --config {config}' save_df_arg = save_df_arg_template.format(ex=SAVE_DF_EX, input=process_i3_outfile, output=save_df_outfile, config=config) save_df_job.add_arg(save_df_arg) return dag
log = os.path.join(comp.paths.condor_scratch_dir, 'log') submit = os.path.join(comp.paths.condor_scratch_dir, 'submit') outdir = os.path.join(os.path.dirname(__file__), 'feature_scan_results') dag_name = 'feature_scan_{}_num_groups-{}'.format(pipeline, num_groups) dag = pycondor.Dagman(name=dag_name, submit=submit) for idx, (features, random_feature) in enumerate(product(scan_features, [True, False])): feature_str = '-'.join(features) job = pycondor.Job(name='feature_scan_num_groups-{}_{}'.format(num_groups, idx), executable=executable, submit=submit, error=error, output=output, log=log, request_cpus=n_jobs, request_memory='3GB', verbose=1, dag=dag) argument = '--features {} '.format(' '.join(features)) for arg_name in ['config', 'num_groups', 'pipeline', 'n_jobs']: argument += '--{} {} '.format(arg_name, getattr(args, arg_name)) if random_feature: argument += '--random_feature ' outfile = os.path.join(outdir, '{}_{}-groups-{}.pkl'.format(pipeline, num_groups, idx)) argument += '--outfile {} '.format(outfile) job.add_arg(argument)
def simulation_processing_dag(config='IC86.2012', batch_size=1000, test=False, snow_lambda=None, dom_eff=None): if all([snow_lambda, dom_eff]): raise NotImplementedError('Specifying a value for both snow_lambda ' 'and dom_eff is not currently supported.') base_dir = os.path.join(comp.paths.comp_data_dir, config, 'sim', 'test' if test else '') if snow_lambda is not None: # snow_lambda_str = str(snow_lambda).replace('.', '-') i3_hdf_outdir = os.path.join(base_dir, 'i3_hdf', 'snow_lambda_{}'.format(snow_lambda)) df_hdf_outdir = os.path.join(base_dir, 'processed_hdf', 'snow_lambda_{}'.format(snow_lambda)) elif dom_eff is not None: i3_hdf_outdir = os.path.join(base_dir, 'i3_hdf', 'dom_eff_{}'.format(dom_eff)) df_hdf_outdir = os.path.join(base_dir, 'processed_hdf', 'dom_eff_{}'.format(dom_eff)) else: i3_hdf_outdir = os.path.join(base_dir, 'i3_hdf', 'nominal') df_hdf_outdir = os.path.join(base_dir, 'processed_hdf', 'nominal') # Create data processing Jobs / Dagman dag_name = 'sim_processing_{}'.format(args.config.replace('.', '-')) dag = pycondor.Dagman(dag_name, submit=PYCONDOR_SUBMIT, verbose=1) sims = comp.simfunctions.config_to_sim(config) for sim in sims: process_i3_job = pycondor.Job( name='process_i3_{}'.format(sim), executable=WRAPPER_EX, error=PYCONDOR_ERROR, output=PYCONDOR_OUTPUT, log=PYCONDOR_LOG, submit=PYCONDOR_SUBMIT, getenv=False, request_memory='3GB' if dom_eff else None, dag=dag) save_df_job = pycondor.Job(name='save_dataframe_{}'.format(sim), executable=WRAPPER_EX, error=PYCONDOR_ERROR, output=PYCONDOR_OUTPUT, log=PYCONDOR_LOG, submit=PYCONDOR_SUBMIT, getenv=False, dag=dag) # Ensure that save_df_job doesn't begin until process_i3_job completes save_df_job.add_parent(process_i3_job) # Split file list into smaller batches for submission if test: batch_size = 2 max_batches = 2 else: max_batches = None sim_file_batches = comp.level3_sim_file_batches( sim, size=batch_size, max_batches=max_batches) gcd = comp.level3_sim_GCD_file(sim) for idx, files in enumerate(sim_file_batches): # Set up process_i3_job arguments outfile_basename = 'sim_{}_part_{:02d}.hdf'.format(sim, idx) process_i3_outfile = os.path.join(i3_hdf_outdir, outfile_basename) # Don't forget to insert GCD file at beginning of FileNameList files.insert(0, gcd) files_str = ' '.join(files) process_i3_arg_template = '{ex} --type sim --files {i3_files} --outfile {outfile}' process_i3_arg = process_i3_arg_template.format( ex=PROCESS_I3_EX, i3_files=files_str, outfile=process_i3_outfile) if snow_lambda is not None: process_i3_arg += ' --snow_lambda {}'.format(snow_lambda) if dom_eff is not None: process_i3_arg += ' --dom_eff {}'.format(dom_eff) process_i3_job.add_arg(process_i3_arg) # Set up save_df_job arguments save_df_outfile = os.path.join(df_hdf_outdir, outfile_basename) save_df_arg_template = '{ex} --input {input} --output {output} --type sim --sim {sim} --config {config}' save_df_arg = save_df_arg_template.format(ex=SAVE_DF_EX, input=process_i3_outfile, output=save_df_outfile, sim=sim, config=config) save_df_job.add_arg(save_df_arg) return dag