def asynchronous_zfp(): sweep_parameters = [ p.ParamRunner('simulation', 'nprocs', [64]), p.ParamCmdLineArg('simulation', 'settings', 1, ["settings-files.json"]), p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput', 'engine', [{ 'BP4': {} }]), p.ParamRunner('pdf_calc', 'nprocs', [8]), p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']), p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']), p.ParamCmdLineArg('pdf_calc', 'bins', 3, [100]), p.ParamCmdLineArg('pdf_calc', 'write_orig_data', 4, ['YES']), p.ParamADIOS2XML('pdf_calc', 'zfp compression', 'PDFAnalysisOutput', 'var_operation', [{ "U": { "zfp": { 'accuracy': 0.001 } } }, { "U": { "zfp": { 'accuracy': 0.0001 } } }]), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency={'pdf_calc': 'simulation'}, node_layout={'summit': node_layouts.separate_nodes()}) return sweep
def get_sweep_params(adios_engine): # Setup the sweep parameters for a Sweep return [ # ParamRunner 'nprocs' specifies the no. of ranks to be spawned p.ParamRunner('simulation', 'nprocs', [1]), # Create a ParamCmdLineArg parameter to specify a command line argument to run the application p.ParamCmdLineArg('simulation', 'settings', 1, ["settings.json"]), # Edit key-value pairs in the json file # Sweep over two values for the F key in the json file. Alongwith 4 values for the nprocs property for # the pdf_calc code, this Sweep will create 2*4 = 8 experiments. p.ParamConfig('simulation', 'feed_rate_U', 'settings.json', 'F', [0.01]), p.ParamConfig('simulation', 'kill_rate_V', 'settings.json', 'k', [0.048]), # Setup an environment variable # p.ParamEnvVar ('simulation', 'openmp', 'OMP_NUM_THREADS', [4]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling. # As both the applications use the same xml file, you need to do this just once. p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput', 'engine', [{ adios_engine: {} }]), # Now setup options for the pdf_calc application. # Sweep over four values for the nprocs p.ParamRunner('pdf_calc', 'nprocs', [1]), p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']), p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf']), ]
class GrayScott(Campaign): name = "Gray-Scott-compression" codes = [("gray-scott", dict(exe="gray-scott", sleep_after=1)), ("compression", dict(exe="compression")) ] supported_machines = ['local', 'theta'] scheduler_options = { "theta": { "queue": "debug-flat-quad", "project": "CSC249ADCD01", } } umask = '027' sweeps = [ p.SweepGroup(name="gsc", walltime=timedelta(minutes=30), component_subdirs=True, component_inputs={ 'gray-scott': ['settings.json','adios2.xml'], 'compression': ['adios2.xml','sz.config','zc.config'] }, parameter_groups= [p.Sweep([ p.ParamCmdLineArg("gray-scott", "settings", 1, ["settings.json"]), p.ParamConfig("gray-scott", "L", "settings.json", "L", [32]), p.ParamConfig("gray-scott", "noise", "settings.json", "noise", [0.01]), p.ParamRunner('gray-scott', 'nprocs', [4] ), p.ParamCmdLineArg("compression", "input", 1, ["../gray-scott/gs.bp"]), p.ParamCmdLineArg("compression", "output", 2, ["CompressionOutput.bp"]), p.ParamCmdLineArg("compression", "compressor", 3, ["1"]), p.ParamRunner('compression', 'nprocs', [1] ) ]), ]), ]
def create_experiment(writer_nprocs, configFile, scalingType, adios_xml_file, writer_decomposition, machine_name, node_layout, post_hoc=False): """ Creates a sweep object that tells Cheetah how to run the adios io test. Assumes 1D decomposition. """ params = [ p.ParamRunner('writer', 'nprocs', [writer_nprocs]), p.ParamCmdLineOption('writer', 'appid', '-a', [1]), p.ParamCmdLineOption('writer', 'configFile', '-c', [configFile]), p.ParamCmdLineOption('writer', 'scaling', scalingType, [None]), p.ParamCmdLineOption('writer', 'adios_xml_file', '-x', [adios_xml_file]), p.ParamCmdLineOption('writer', 'decomposition', '-d', [writer_decomposition]), p.ParamCmdLineOption('writer', 'timing_info', '-t', [None]), ] rc_dependency = None sweep = p.Sweep(parameters=params, rc_dependency=rc_dependency) if node_layout: sweep.node_layout = {machine_name: node_layout} return sweep
def posthoc_analysis(sim_nprocs, analysis_nprocs): sweep_parameters = [ p.ParamRunner('simulation', 'nprocs', [sim_nprocs]), p.ParamCmdLineOption('simulation', 'sim input', '-in', ["in.lj.nordf"]), p.ParamADIOS2XML('simulation', 'sim output engine', 'custom', 'engine', [{ 'BP4': {} }]), p.ParamRunner('rdf_calc', 'nprocs', [analysis_nprocs]), p.ParamCmdLineOption('rdf_calc', 'input', '-in', ["in.lj.rdf.rerun"]), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency={'rdf_calc': 'simulation'}, node_layout={'summit': node_layouts.separate_nodes()}) return sweep
def posthoc_analysis(): sweep_parameters = [ p.ParamRunner('simulation', 'nprocs', [4]), p.ParamCmdLineArg('simulation', 'settings', 1, ["settings-files.json"]), p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput', 'engine', [{ 'BP4': {} }]), p.ParamRunner('pdf_calc', 'nprocs', [4]), p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']), p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency={'pdf_calc': 'simulation'}, node_layout={'summit': node_layouts.separate_nodes()}) return sweep
def insitu_analysis(node_layout): sweep_parameters = [ p.ParamRunner('simulation', 'nprocs', [64]), p.ParamCmdLineArg('simulation', 'settings', 1, ["settings-staging.json"]), p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput', 'engine', [{ 'SST': {} }]), p.ParamRunner('pdf_calc', 'nprocs', [4, 8, 16, 32]), p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']), p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency=None, node_layout={'summit': node_layout}) return sweep
def inline_analysis(sim_nprocs): sweep_parameters = [ p.ParamRunner('sim_inline_rdf_calc', 'nprocs', [sim_nprocs]), p.ParamCmdLineOption('sim_inline_rdf_calc', 'input', '-in', ["in.lj.rdf.nodump"]), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency=None, node_layout={'summit': node_layouts.all_sim_nodes()}) return sweep
def insitu_analysis(sim_nprocs, analysis_nprocs, node_layout, adios_engine): sweep_parameters = [ p.ParamRunner('simulation', 'nprocs', [sim_nprocs]), p.ParamCmdLineOption('simulation', 'sim input', '-in', ["in.lj.nordf"]), p.ParamADIOS2XML('simulation', 'sim output engine', 'custom', 'engine', [{ adios_engine: {} }]), p.ParamRunner('rdf_calc', 'nprocs', [analysis_nprocs]), p.ParamCmdLineOption('rdf_calc', 'input', '-in', ["in.lj.rdf.rerun"]), p.ParamADIOS2XML('rdf_calc', 'analysis input engine', 'read_dump', 'engine', [{ adios_engine: {} }]), ] sweep = p.Sweep(parameters=sweep_parameters, rc_dependency=None, node_layout={'summit': node_layout}) return sweep
def create_experiment(writer_nprocs, reader_nprocs, config_file, adios_xml_file, engine, writer_decomposition, reader_decomposition, machine_name, node_layout): """ Creates a sweep object that tells Cheetah how to run the adios io test. Assumes 1D decomposition. """ # print(adios_xml_file) # print(engine) params = [ # ParamRunner 'nprocs' specifies the no. of ranks to be spawned p.ParamRunner ('writer', 'nprocs', [writer_nprocs]), # Create a ParamCmdLineArg parameter to specify a command line argument to run the application p.ParamCmdLineOption ('writer', 'app', '-a', [1]), p.ParamCmdLineOption ('writer', 'app-config', '-c', [config_file]), p.ParamCmdLineOption ('writer', 'adios-config', '-x', [adios_xml_file]), p.ParamCmdLineOption ('writer', 'strongscaling', '-w', [None]), p.ParamCmdLineOption ('writer', 'timing', '-t', [None]), p.ParamCmdLineOption ('writer', 'decomposition', '-d', [writer_decomposition]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file p.ParamADIOS2XML ('writer', 'dump_trajectory', 'trj_dump_out', 'engine', [engine]), # Sweep over four values for the nprocs p.ParamRunner ('reader', 'nprocs', [reader_nprocs]), p.ParamCmdLineOption ('reader', 'app', '-a', [2]), p.ParamCmdLineOption ('reader', 'app-config', '-c', [config_file]), p.ParamCmdLineOption ('reader', 'adios-config', '-x', [adios_xml_file]), p.ParamCmdLineOption ('reader', 'weakscaling', '-s', [None]), p.ParamCmdLineOption ('reader', 'timing', '-t', [None]), p.ParamCmdLineOption ('reader', 'decomposition', '-d', [reader_decomposition]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file p.ParamADIOS2XML ('reader', 'load_trajectory', 'trj_dump_in', 'engine', [engine]), ] sweep = p.Sweep(parameters=params) if node_layout: sweep.node_layout = {machine_name: node_layout} return sweep
def create_experiment(writer_nprocs, reader_nprocs, trj_engine, sorted_trj_engine, machine_name, node_layout): """ Creates a sweep object that tells Cheetah how to run the adios io test. Assumes 1D decomposition. """ # print(adios_xml_file) # print(engine) params = [ # ParamRunner 'nprocs' specifies the no. of ranks to be spawned p.ParamRunner ('writer', 'nprocs', [writer_nprocs]), # Create a ParamCmdLineArg parameter to specify a command line argument to run the application p.ParamCmdLineArg ('writer', 'config', 1, ['copro.nw']), # Change the engine for the 'SimulationOutput' IO object in the adios xml file p.ParamADIOS2XML ('writer', 'trajectory', 'trj', 'engine', [trj_engine]), # Sweep over four values for the nprocs p.ParamRunner ('reader', 'nprocs', [reader_nprocs]), p.ParamCmdLineArg ('reader', 'input_md', 1, ['copro_md']), p.ParamCmdLineArg ('reader', 'verbose', 2, [1]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file p.ParamADIOS2XML ('reader', 'sorted_trj', 'SortingOutput', 'engine', [sorted_trj_engine]), p.ParamRunner ('analyzer', 'nprocs', [1]), p.ParamCmdLineArg ('analyzer', 'script', 1, ['pca3d.R']), p.ParamCmdLineArg ('analyzer', 'window', 2, [100]), p.ParamCmdLineArg ('analyzer', 'stride', 3, [10]), p.ParamCmdLineArg ('analyzer', 'k', 4, [5]), p.ParamCmdLineArg ('analyzer', 'sorted_trj', 5, ['copro_md_trj.bp']), p.ParamCmdLineArg ('analyzer', 'xml', 6, ['adios2.xml']), p.ParamCmdLineArg ('analyzer', 'mcCore', 7, [1]), p.ParamCmdLineArg ('analyzer', 'output', 8, ['pairs.pdf']), ] sweep = p.Sweep(parameters=params) if node_layout: sweep.node_layout = {machine_name: node_layout} return sweep
class ProducerConsumer(Campaign): # A name for the campaign name = "coupling-example" # WORKFLOW SETUP #--------------- # A list of the codes that will be part of the workflow # If there is an adios xml file associated with the codes, list it here # 'sleep_after' represents the time gap after which the next code is spawned # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the # code as a serial application codes = [ ("producer", dict(exe="program/producer.py", adios_xml_file='adios2.xml', sleep_after=5)), ] # CAMPAIGN SETTINGS #------------------ # A list of machines that this campaign is supported on supported_machines = [ 'local', 'titan', 'theta', 'summit', 'deepthought2_cpu', 'sdg_tm76' ] # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails kill_on_partial_failure = True # Some pre-processing in the experiment directory # This is performed when the campaign directory is created (before the campaign is launched) run_dir_setup_script = None # A post-processing script to be run in the experiment directory after the experiment completes # For example, removing some large files after the experiment is done run_post_process_script = None # umask applied to your directory in the campaign so that colleagues can view files umask = '027' # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine scheduler_options = { "cori": { "queue": "debug", "constraint": "haswell", "license": "SCRATCH,project", }, "titan": { "queue": "debug", "project": "csc242", }, "theta": { "queue": "debug-flat-quad", "project": "CSC249ADCD01", }, "summit": { 'project': 'csc299' } } # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc. # Ensure this script is executable # app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'} # PARAMETER SWEEPS #----------------- # Setup how the workflow is run, and what values to 'sweep' over # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on. sweep1_parameters = [ p.ParamRunner('producer', 'nprocs', [2]), p.ParamCmdLineArg('producer', 'array_size_per_pe', 1, [ 1024 * 1024, ]), # 1M, 2M, 10M p.ParamCmdLineArg('producer', 'num_steps', 2, [2]), #p.ParamADIOS2XML ('producer', 'engine_sst', 'producer', 'engine', [ {"BP4": {}} ]), ] node = SummitNode() node.cpu[0] = f"producer:0" node.cpu[1] = f"producer:1" node_layout = [node] # Create a sweep # node_layout represents no. of processes per node sweep1 = p.Sweep(node_layout={'summit': node_layout}, parameters=sweep1_parameters, rc_dependency=None) # Create a sweep group from the above sweep. You can place multiple sweeps in the group. # Each group is submitted as a separate job. sweepGroup1 = p.SweepGroup( "sg-1", walltime=300, per_run_timeout=60, parameter_groups=[sweep1], launch_mode='default' #, # or MPMD #tau_profiling=False, #tau_tracing=False, # optional: # nodes=10, # tau_profiling=True, # tau_tracing=False, # run_repetitions=2, # <-- repeat each experiment this many times # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes' ) # Sweep groups to be activated # sweeps = {'summit': [sweepGroup1]} sweeps = [sweepGroup1]
class HeatTransfer(Campaign): name = "heat-transfer-small-tau-sos" codes = [ ("dataspaces", dict(exe="bin/dataspaces_server", sleep_after=10, linked_with_sosflow=False)), ("stage_write", dict(exe="stage_write/stage_write_tau", sleep_after=5, linked_with_sosflow=True)), ("heat", dict(exe="heat_transfer_adios2_tau", sleep_after=5, linked_with_sosflow=True, adios_xml_file='heat_transfer.xml')), ] supported_machines = ['local','titan','theta', 'cori'] kill_on_partial_failure=True sosd_path = "bin/sosd" scheduler_options = { "titan": {"project":"CSC249ADCD01", "queue":"batch" }, "theta": {"project":"CSC249ADCD01", "queue":"debug-flat-quad" }, "cori": {"project":"m3084", "queue":"regular", "constraint": "haswell" } } app_config_scripts = { 'titan': 'titan_config.sh', 'theta': 'theta_config.sh', 'cori': 'cori_config.sh', 'local': None, } sweeps = [ p.SweepGroup( "sim", per_run_timeout=180, sosflow_profiling=True, component_subdirs=False, parameter_groups= [p.Sweep( node_layout={ "titan": [{ "heat": 16}, {"dataspaces_server":1 }], "theta": [{ "heat": 16}, {"dataspaces_server":1 }], "cori": [{ "heat": 32}, {"dataspaces_server":1 }] }, parameters=[ p.ParamRunner("heat", "nprocs", [16]), p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]), p.ParamCmdLineArg("heat", "xprocs", 2, [4]), p.ParamCmdLineArg("heat", "yprocs", 3, [4]), p.ParamCmdLineArg("heat", "xsize", 4, [32]), p.ParamCmdLineArg("heat", "ysize", 5, [32]), p.ParamCmdLineArg("heat", "timesteps", 6, [10]), p.ParamCmdLineArg("heat", "checkpoints", 7, [2]), p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T", ["none", "zlib", "bzip2", "sz", "zfp", "mgard:tol=0.00001", "blosc:threshold=4096,shuffle=bit,lvl=1,threads=4,compressor=zstd"]), p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat", ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0"]), p.ParamAdiosXML("heat", "transport_T_final", "adios_transport:heat_final", ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]), ]), ]), p.SweepGroup( "staging-dataspaces", per_run_timeout=500, sosflow_profiling=True, parameter_groups= [p.Sweep( node_layout={ "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}], "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}], "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]}, parameters=[ p.ParamRunner("stage_write", "nprocs", [4]), p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]), p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]), p.ParamCmdLineArg("stage_write", "rmethod", 3, ["DATASPACES"]), p.ParamCmdLineArg("stage_write", "ropt", 4, [""]), p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]), p.ParamCmdLineArg("stage_write", "wopt", 6, [""]), p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]), p.ParamCmdLineArg("stage_write", "transform", 8, ["none", "zfp:accuracy=0.001", "sz:absolute=0.001", "zlib:9", "bzip2:9"]), p.ParamRunner("heat", "nprocs", [16]), p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]), p.ParamCmdLineArg("heat", "xprocs", 2, [4]), p.ParamCmdLineArg("heat", "yprocs", 3, [4]), p.ParamCmdLineArg("heat", "xsize", 4, [32]), p.ParamCmdLineArg("heat", "ysize", 5, [32]), p.ParamCmdLineArg("heat", "timesteps", 6, [10]), p.ParamCmdLineArg("heat", "checkpoints", 7, [2]), p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T", ["none",]), p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat", ["DATASPACES"]), p.ParamAdiosXML("heat", "transport_T_final", "adios_transport:heat_final", ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]), ]), ]), p.SweepGroup( "staging-dataspaces-dimes", per_run_timeout=500, sosflow_profiling=True, parameter_groups= [p.Sweep( node_layout={ "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}], "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}], "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]}, parameters=[ p.ParamRunner("stage_write", "nprocs", [4]), p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]), p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]), p.ParamCmdLineArg("stage_write", "rmethod", 3, ["DIMES"]), p.ParamCmdLineArg("stage_write", "ropt", 4, [""]), p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]), p.ParamCmdLineArg("stage_write", "wopt", 6, [""]), p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]), p.ParamCmdLineArg("stage_write", "transform", 8, ["none", "zfp:accuracy=0.001", "sz:absolute=0.001", "zlib:9", "bzip2:9"]), p.ParamRunner("heat", "nprocs", [16]), p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]), p.ParamCmdLineArg("heat", "xprocs", 2, [4]), p.ParamCmdLineArg("heat", "yprocs", 3, [4]), p.ParamCmdLineArg("heat", "xsize", 4, [32]), p.ParamCmdLineArg("heat", "ysize", 5, [32]), p.ParamCmdLineArg("heat", "timesteps", 6, [4]), p.ParamCmdLineArg("heat", "checkpoints", 7, [2]), p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T", ["none",]), p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat", ["DIMES"]), p.ParamAdiosXML("heat", "transport_T_final", "adios_transport:heat_final", ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]), ]), ]), p.SweepGroup( "staging-flexpath", per_run_timeout=500, sosflow_profiling=True, parameter_groups= [p.Sweep( node_layout={ "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}], "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}], "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]}, parameters=[ p.ParamRunner("stage_write", "nprocs", [4]), p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]), p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]), p.ParamCmdLineArg("stage_write", "rmethod", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "ropt", 4, [""]), p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]), p.ParamCmdLineArg("stage_write", "wopt", 6, [""]), p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]), p.ParamCmdLineArg("stage_write", "transform", 8, ["none","zfp:accuracy=0.001","sz:absolute=0.001","zlib:9","bzip2:9"]), p.ParamRunner("heat", "nprocs", [16]), p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]), p.ParamCmdLineArg("heat", "xprocs", 2, [4]), p.ParamCmdLineArg("heat", "yprocs", 3, [4]), p.ParamCmdLineArg("heat", "xsize", 4, [32]), p.ParamCmdLineArg("heat", "ysize", 5, [32]), p.ParamCmdLineArg("heat", "timesteps", 6, [4]), p.ParamCmdLineArg("heat", "checkpoints", 7, [2]), p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T", ["none",]), p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat", ["FLEXPATH"]), p.ParamAdiosXML("heat", "transport_T_final", "adios_transport:heat_final", ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]), ]), ]), ]
class ProducerConsumer(Campaign): # A name for the campaign name = "coupling-example" # WORKFLOW SETUP #--------------- # A list of the codes that will be part of the workflow # If there is an adios xml file associated with the codes, list it here # 'sleep_after' represents the time gap after which the next code is spawned # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the # code as a serial application codes = [ ("producer", dict(exe="producer.py", adios_xml_file='adios2.xml', sleep_after=5)), ("mean_calc", dict(exe="mean_calculator.py", adios_xml_file='adios2.xml', runner_override=False)) ] # CAMPAIGN SETTINGS #------------------ # A list of machines that this campaign is supported on supported_machines = ['local', 'titan', 'theta', 'summit', 'rhea', 'deepthought2_cpu', 'sdg_tm76'] # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails kill_on_partial_failure = True # Some pre-processing in the experiment directory # This is performed when the campaign directory is created (before the campaign is launched) run_dir_setup_script = None # A post-processing script to be run in the experiment directory after the experiment completes # For example, removing some large files after the experiment is done run_post_process_script = None # umask applied to your directory in the campaign so that colleagues can view files umask = '027' # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine scheduler_options = {'theta': {'project': '', 'queue': 'batch'}, 'summit': {'project':'csc143','reservation':'csc143_m414'}, 'rhea': {'project':'csc143'}} # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc. # Ensure this script is executable app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'} # PARAMETER SWEEPS #----------------- # Setup how the workflow is run, and what values to 'sweep' over # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on. sweep1_parameters = [ p.ParamRunner ('producer', 'nprocs', [2]), p.ParamRunner ('mean_calc', 'nprocs', [2]), p.ParamCmdLineArg ('producer', 'array_size_per_pe', 1, [1024*1024,]), # 1M, 2M, 10M p.ParamCmdLineArg ('producer', 'num_steps', 2, [10]), p.ParamADIOS2XML ('producer', 'engine_sst', 'producer', 'engine', [ {"SST": {}} ]), # p.ParamADIOS2XML ('producer', 'compression', 'producer', 'var_operation', [ {"U": {"zfp":{'accuracy':0.001, 'tolerance':0.9}}} ]), ] # Summit node layout # Create a shared node layout where the producer and mean_calc share compute nodes shared_node_nc = SummitNode() # place producer on the first socket for i in range(21): shared_node_nc.cpu[i] = 'producer:{}'.format(i) # place analysis on the second socket for i in range(8): shared_node_nc.cpu[22+i] = 'mean_calc:{}'.format(i) # This should be 'obj=machine.VirtualNode()' shared_node_dt = DTH2CPUNode() for i in range(10): shared_node_dt.cpu[i] = 'producer:{}'.format(i) shared_node_dt.cpu[11] = 'mean_calc:0' shared_node_dt.cpu[12] = 'mean_calc:1' # Create a sweep # node_layout represents no. of processes per node sweep1 = p.Sweep (node_layout = {'summit': [shared_node_nc], 'deepthought2_cpu': [shared_node_dt]}, # simulation: 16 ppn, norm_calc: 4 ppn parameters = sweep1_parameters, rc_dependency=None) # Create a sweep group from the above sweep. You can place multiple sweeps in the group. # Each group is submitted as a separate job. sweepGroup1 = p.SweepGroup ("sg-1", walltime=300, per_run_timeout=60, parameter_groups=[sweep1], launch_mode='default', # or MPMD # optional: tau_profiling=True, tau_tracing=False, # nodes=10, # tau_profiling=True, # tau_tracing=False, # run_repetitions=2, <-- repeat each experiment this many times # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes' ) # Create a sweep group from the above sweep. You can place multiple sweeps in the group. # Each group is submitted as a separate job. sweepGroup2 = p.SweepGroup ("sg-2", walltime=300, per_run_timeout=60, parameter_groups=[sweep1], launch_mode='default', # or MPMD # optional: tau_profiling=True, tau_tracing=False, # nodes=10, # tau_profiling=True, # tau_tracing=False, # run_repetitions=2, <-- repeat each experiment this many times # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes' ) # Sweep groups to be activated sweeps = {'MACHINE_ANY':[sweepGroup1], 'summit':[sweepGroup2]}
class Brusselator(Campaign): # A name for the campaign name = "Brusselator" # WORKFLOW SETUP #--------------- # A list of the codes that will be part of the workflow # If there is an adios xml file associated with the codes, list it here # 'sleep_after' represents the time gap after which the next code is spawned # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the # code as a serial application codes = [ ("simulation", dict(exe="simulation/Brusselator", adios_xml_file='adios2.xml', sleep_after=None)), ("norm_calc", dict(exe="analysis/norm_calc", adios_xml_file='adios2.xml', runner_override=False)) ] # CAMPAIGN SETTINGS #------------------ # A list of machines that this campaign is supported on supported_machines = ['local', 'titan', 'theta'] # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails kill_on_partial_failure = True # Some pre-processing in the experiment directory # This is performed when the campaign directory is created (before the campaign is launched) run_dir_setup_script = None # A post-processing script to be run in the experiment directory after the experiment completes # For example, removing some large files after the experiment is done run_post_process_script = None # umask applied to your directory in the campaign so that colleagues can view files umask = '027' # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine scheduler_options = {'titan': {'project':'CSC249ADCD01', 'queue': 'batch'}} # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc. # Ensure this script is executable app_config_scripts = {'local': 'setup.sh', 'titan': 'env_setup.sh'} # PARAMETER SWEEPS #----------------- # Setup how the workflow is run, and what values to 'sweep' over # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on. sweep1_parameters = [ # ParamRunner with 'nprocs' sets the number of MPI processes p.ParamRunner ('simulation', 'nprocs', [4,8]), # <-- how to sweep over values p.ParamCmdLineArg ('simulation', 'output', 1, ['bru.bp']), p.ParamCmdLineArg ('simulation', 'nx', 2, [32]), p.ParamCmdLineArg ('simulation', 'ny', 3, [32]), p.ParamCmdLineArg ('simulation', 'nz', 4, [32]), p.ParamCmdLineArg ('simulation', 'steps', 5, [10,20,50]), # sweep over these values. creates cross-product of runs with 'nprocs' above p.ParamCmdLineArg ('simulation', 'plotgap', 6, [1]), p.ParamRunner ('norm_calc', 'nprocs', [1]), p.ParamCmdLineArg ('norm_calc', 'infile', 1, ['bru.bp']), p.ParamCmdLineArg ('norm_calc', 'outfile', 2, ['norm_calc.out.bp']), p.ParamCmdLineArg ('norm_calc', 'write_norms_only', 3, [1]), # ParamADIOS2XML can be used to setup a value in the ADIOS xml file for the application # Set the transport to BPFile, as we want the codes to run serially. Set the rc_dependency # in the Sweep to denote dependency between the codes # To couple codes for concurrent execution, use a transport method such as SST p.ParamADIOS2XML ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {}} ]), p.ParamADIOS2XML ('simulation', 'AnalysisOutput', 'engine', [ {"BPFile": {}} ]), # p.ParamADIOS2XML ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {'Threads':1}}, # p.ParamADIOS2XML ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {'Threads':1}}, {"BPFile": {"ProfileUnits": "Microseconds"}} ]), # Use ParamCmdLineOption for named arguments # p.ParamCmdLineOption ('plotting', 'input_stream', '-i', ['bru.bp']), # Use ParamKeyValue to set options in a key-value configuration file. Input file can be a json file. # File path can be relative to the path specified in '-a' or it can be absolute. # File will be copied to the working_dir automatically by Cheetah. # p.ParamKeyValue ('simulation', 'feed_rate', 'input.conf', 'key', ['value']), # Sweep over environment variables # p.ParamEnvVar ('simulation', 'openmp_stuff', 'OMP_NUM_THREADS', [4,8]), # Pass additional scheduler arguments. # p.ParamSchedulerArgs ('simulation', [{'-f':'hosts.txt'}]) ] # Create a sweep # node_layout represents no. of processes per node # rc_dependency denotes dependency between run components. Here, norm_calc will run after simulation has finished sweep1 = p.Sweep (node_layout = {'titan': [{'simulation':16}, {'norm_calc': 4}] }, # simulation: 16 ppn, norm_calc: 4 ppn parameters = sweep1_parameters, rc_dependency={'norm_calc':'simulation'}) # Create a sweep group from the above sweep. You can place multiple sweeps in the group. # Each group is submitted as a separate job. sweepGroup1 = p.SweepGroup ("sg-tmp", walltime=300, per_run_timeout=60, parameter_groups=[sweep1], launch_mode='default', # or MPMD # optional: # nodes=10, # run_repetitions=2, # no. of times each experiment must be repeated (here, total runs = 3) # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes' ) sweepGroup2 = copy.deepcopy(sweepGroup1) sweepGroup2.name = 'sg-2' # Sweep groups to be activated sweeps = [sweepGroup1, sweepGroup2]
class ProducerConsumer(Campaign): # A name for the campaign name = "coupling-example" # WORKFLOW SETUP #--------------- # A list of the codes that will be part of the workflow # If there is an adios xml file associated with the codes, list it here # 'sleep_after' represents the time gap after which the next code is spawned # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the # code as a serial application codes = [("producer", dict(exe="producer.py", adios_xml_file='adios2.xml', sleep_after=5)), ("mean_calc", dict(exe="mean_calculator.py", adios_xml_file='adios2.xml', runner_override=False))] # CAMPAIGN SETTINGS #------------------ # A list of machines that this campaign is supported on supported_machines = ['local', 'titan', 'theta', 'summit'] # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails kill_on_partial_failure = True # Some pre-processing in the experiment directory # This is performed when the campaign directory is created (before the campaign is launched) run_dir_setup_script = None # A post-processing script to be run in the experiment directory after the experiment completes # For example, removing some large files after the experiment is done run_post_process_script = None # umask applied to your directory in the campaign so that colleagues can view files umask = '027' # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine scheduler_options = { 'theta': { 'project': '', 'queue': 'batch' }, 'summit': { 'project': 'CSC299' } } # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc. # Ensure this script is executable app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'} # PARAMETER SWEEPS #----------------- # Setup how the workflow is run, and what values to 'sweep' over # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on. sweep1_parameters = [ p.ParamRunner('producer', 'nprocs', [128]), p.ParamRunner('mean_calc', 'nprocs', [36]), p.ParamCmdLineArg('producer', 'array_size_per_pe', 1, [1024 * 1024]), # 1M, 2M, 10M p.ParamCmdLineArg('producer', 'num_steps', 2, [10]), p.ParamADIOS2XML('producer', 'staging', 'producer', 'engine', [{ "SST": {} }]), ] shared_node = SummitNode() for i in range(18): shared_node.cpu[i] = "producer:{}".format(math.floor(i / 6)) shared_node.cpu[i + 21] = "producer:{}".format(math.floor( (i + 18) / 6)) for i in range(3): shared_node.cpu[i + 18] = "mean_calc:0" shared_node.cpu[i + 18 + 21] = "mean_calc:0" for i in range(6): shared_node.gpu[i] = ["producer:{}".format(i)] shared_node_layout = [shared_node] shared_node_1_per_rank = SummitNode() for i in range(18): shared_node_1_per_rank.cpu[i] = "producer:{}".format(i) shared_node_1_per_rank.cpu[i + 21] = "producer:{}".format(i + 18) for i in range(3): shared_node_1_per_rank.cpu[i + 18] = "mean_calc:{}".format(i) shared_node_1_per_rank.cpu[i + 18 + 21] = "mean_calc:{}".format(i + 3) for i in range(6): shared_node_1_per_rank.gpu[i] = ["producer:{}".format(i)] shared_node_layout_2 = [shared_node_1_per_rank] shared_node_shared_gpu = SummitNode() for i in range(18): shared_node_shared_gpu.cpu[i] = "producer:{}".format(math.floor(i / 6)) shared_node_shared_gpu.cpu[i + 21] = "producer:{}".format( math.floor((i + 18) / 6)) for i in range(3): shared_node_shared_gpu.cpu[i + 18] = "mean_calc:0" shared_node_shared_gpu.cpu[i + 18 + 21] = "mean_calc:0" shared_node_shared_gpu.gpu[0] = ["producer:0"] shared_node_shared_gpu.gpu[1] = [ "producer:0", "producer:1", ] shared_node_shared_gpu.gpu[2] = ["producer:0", "producer:1", 'mean_calc:0'] shared_node_layout_3 = [shared_node_shared_gpu] sep_node_producer = SummitNode() sep_node_mean_calc = SummitNode() for i in range(18): sep_node_producer.cpu[i] = "producer:{}".format(math.floor(i / 6)) for i in range(3): sep_node_mean_calc.cpu[i + 18] = "mean_calc:0" sep_node_mean_calc.cpu[i + 18 + 21] = "mean_calc:0" for i in range(3): sep_node_producer.gpu[i] = ["producer:{}".format(i)] sep_node_layout = [sep_node_producer, sep_node_mean_calc] # Create a sweep # node_layout represents no. of processes per node sweep1 = p.Sweep(node_layout={'summit': shared_node_layout}, parameters=sweep1_parameters, rc_dependency=None) sweep2 = p.Sweep(node_layout={'summit': shared_node_layout_2}, parameters=sweep1_parameters, rc_dependency=None) sweep3 = p.Sweep(node_layout={'summit': shared_node_layout_3}, parameters=sweep1_parameters, rc_dependency=None) sweep4 = p.Sweep(node_layout={'summit': sep_node_layout}, parameters=sweep1_parameters, rc_dependency=None) # Create a sweep group from the above sweep. You can place multiple sweeps in the group. # Each group is submitted as a separate job. sweepGroup1 = p.SweepGroup( "sg-1", walltime=300, per_run_timeout=60, parameter_groups=[sweep1, sweep2, sweep3, sweep4], launch_mode='default', # or MPMD # optional: # tau_profiling=True, # tau_tracing=False, # nodes=10, # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory # component_inputs = {'producer': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes' ) # Sweep groups to be activated sweeps = {'summit': [sweepGroup1]}
class Exaalt(Campaign): name = "Exaalt" codes = dict(exaalt="pt_producer_global", stage_write="./stage_write/stage_write") supported_machines = ['titan_fob'] project = "CSC143" queue = "batch" inputs = ["states_list.txt"] sweeps = [ # Staging and compression enabled p.SweepGroup( nodes=320, post_processing="", parameter_groups=[ p.Sweep([ p.ParamRunner("exaalt", "nprocs", [4096]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [2097152]), #two million states p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), p.ParamRunner("stage_write", "nprocs", [512, 1024]), p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]), p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]), p.ParamCmdLineArg("stage_write", "adios_read_method", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]), p.ParamCmdLineArg("stage_write", "adios_write_method", 5, ["POSIX"]), p.ParamCmdLineArg("stage_write", "write_method_params", 6, [""]), p. ParamCmdLineArg("stage_write", "variables_to_transform", 7, [ "atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz" ]), p.ParamCmdLineArg("stage_write", "transform_params", 8, ["none", "zlib:9", "bzip2:9"]), ]), ]), # No staging or compression. Simulation writes data to disk. This is the baseline test case. p.SweepGroup(nodes=256, post_processing="", parameter_groups=[ p.Sweep([ p.ParamRunner("exaalt", "nprocs", [4096]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [10485760]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["POSIX"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), ]), ]), ]
class GrayScott(Campaign): name = "Gray-Scott" codes = [("gray-scott", dict(exe="gray-scott"))] app_config_scripts = {'summit': 'env_summit.sh'} supported_machines = ['local', 'theta', 'summit'] scheduler_options = { "theta": { "queue": "debug-flat-quad", "project": "CSC249ADCD01", }, "summit": { "project": "csc299", } } kill_on_partial_failure = True umask = '027' nprocs = 6 shared_node = SummitNode() for i in range(nprocs): shared_node.cpu[i] = "gray-scott:{}".format(i) shared_node.gpu[i] = ["gray-scott:{}".format(i)] shared_node_layout = [shared_node] L = [256] noise = [1.e-5] Du = [0.1, 0.2, 0.3] Dv = [0.05, 0.1, 0.15] F = [0.01, 0.02, 0.03] k = [0.048, 0.04, 0.06] sweep_parameters = \ [ p.ParamCmdLineArg("gray-scott", "settings", 1, ["settings.json"]), p.ParamConfig("gray-scott", "L", "settings.json", "L", L), p.ParamConfig("gray-scott", "noise", "settings.json", "noise", noise), p.ParamConfig("gray-scott", "Du", "settings.json", "Du", Du), p.ParamConfig("gray-scott", "Dv", "settings.json", "Dv", Dv), p.ParamConfig("gray-scott", "F", "settings.json", "F", F), p.ParamConfig("gray-scott", "k", "settings.json", "k", k), p.ParamRunner('gray-scott', 'nprocs', [nprocs] ), ] sweep = p.Sweep(parameters=sweep_parameters, node_layout={'summit': shared_node_layout}) nodes = len(noise) * len(Du) * len(Dv) * len(F) * len(k) sweeps = \ [ p.SweepGroup( name = "gs", walltime = timedelta(minutes=60), nodes = nodes, component_subdirs = True, component_inputs = { 'gray-scott': ['settings.json','adios2.xml'], }, parameter_groups = [sweep] ) ]
class PlanarReadqCampaign(Campaign): name = "planar" codes = [("app1", dict(exe="adios_iotest", adios_xml_file='adios2.xml', sleep_after=5)), ("app2", dict(exe="adios_iotest", adios_xml_file='adios2.xml', runner_override=False))] run_dir_setup_script = "planar_reads.sh" supported_machines = ['local', 'cori', 'titan', 'theta'] scheduler_options = { "cori": { "queue": "debug", "constraint": "haswell", "license": "SCRATCH,project", }, "titan": { "queue": "debug", "project": "csc242", }, "theta": { "queue": "debug-flat-quad", "project": "CSC249ADCD01", } } umask = '027' sweeps = [ p.SweepGroup( name="all-methods", nodes=2, walltime=timedelta(minutes=30), parameter_groups=[ p.Sweep([ p.ParamRunner("app1", "nprocs", [8]), p.ParamRunner("app2", "nprocs", [8]), p.ParamCmdLineOption("app1", "app ID", "-a", [1]), p.ParamCmdLineOption("app1", "config file", "-c", ["planar_reads.txt"]), p.ParamCmdLineOption("app1", "adios xml", "-x", ["adios2.xml"]), p.ParamCmdLineArg("app1", "weak scaling", 1, ["-w"]), # Fix this somehow p.ParamCmdLineArg("app1", "rank decomp", 2, ["-d"]), p.ParamCmdLineArg("app1", "decomp x", 3, [2]), p.ParamCmdLineArg("app1", "decomp y", 4, [2]), p.ParamCmdLineArg("app1", "decomp z", 5, [2]), p.ParamCmdLineOption("app2", "app ID", "-a", [2]), p.ParamCmdLineOption("app2", "config file", "-c", ["planar_reads.txt"]), p.ParamCmdLineOption("app2", "adios xml", "-x", ["adios2.xml"]), p.ParamCmdLineArg("app2", "weak scaling", 1, ["-w"]), p.ParamCmdLineArg("app2", "rank decomp", 2, ["-d"]), p.ParamCmdLineArg("app2", "decomp x", 3, [2]), p.ParamCmdLineArg("app2", "decomp y", 4, [2]), p.ParamCmdLineArg("app2", "decomp z", 5, [2]), p.ParamEnvVar("app1", "cube length", "CUBE_LEN", [40]), p.ParamEnvVar("app1", "read pattern", "READ_PATTERN", ["ij", "ik", "jk", "chunk"]) ]), ]), ]
class Exaalt(Campaign): name = "Exaalt" codes = [("stage_write", dict(exe="stage_write/stage_write")), ("exaalt", dict(exe="pt_producer_global"))] # Note that titan has 16 processes per node supported_machines = ['titan'] scheduler_options = { "titan": { "project": "CSC242", "queue": "batch" } } kill_on_partial_failure = True # Example post process script which saves the contents of the output # directory after a run, then deletes it to make room for future # runs without worrying about the user's disk quota. run_post_process_script = "post-run-rm-staged.py" inputs = ["states_list.txt"] sweeps = [ # Test group that can be run separately to test that binaries and # post process script are working. Only generates a four runs, # with same number of nodes but different compression options for # stage. p.SweepGroup(name="test-32", nodes=34, walltime=timedelta(hours=1, minutes=5), per_run_timeout=timedelta(minutes=15), parameter_groups=[ p.Sweep([ # 32 exaalt nodes p.ParamRunner("exaalt", "nprocs", [512]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [20480]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), # 2 stage nodes p.ParamRunner("stage_write", "nprocs", [32]), p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]), p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]), p.ParamCmdLineArg("stage_write", "adios_read_method", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]), p.ParamCmdLineArg("stage_write", "adios_write_method", 5, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("stage_write", "write_method_params", 6, ["have_metadata_file=0;num_aggregators=4"]), p.ParamCmdLineArg("stage_write", "variables_to_transform", 7, ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]), p.ParamCmdLineArg("stage_write", "transform_params", 8, ["none","zlib:9", "bzip2:9","lz4"]), ]), ]), # Submit everything as a single job, to avoid queuing delay. # Titan allows 12 hour walltime for jobs of 313-3,749 nodes in # the batch queue. p.SweepGroup(name="64-128-256", nodes=384, walltime=timedelta(hours=48), per_run_timeout=timedelta(hours=1), parameter_groups=[ p.Sweep([ # 256 exaalt nodes p.ParamRunner("exaalt", "nprocs", [4096]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), # 16, 8, or 4 stage nodes p.ParamRunner("stage_write", "nprocs", [256,128,64]), p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]), p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]), p.ParamCmdLineArg("stage_write", "adios_read_method", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]), p.ParamCmdLineArg("stage_write", "adios_write_method", 5, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("stage_write", "write_method_params", 6, ["have_metadata_file=0;num_aggregators=16"]), p.ParamCmdLineArg("stage_write", "variables_to_transform", 7, ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]), p.ParamCmdLineArg("stage_write", "transform_params", 8, ["none","zlib:9", "bzip2:9","lz4"]), ]), p.Sweep([ # 128 exaalt nodes p.ParamRunner("exaalt", "nprocs", [2048]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), # 8, 4, or 2 stage nodes p.ParamRunner("stage_write", "nprocs", [128,64,32]), p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]), p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]), p.ParamCmdLineArg("stage_write", "adios_read_method", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]), p.ParamCmdLineArg("stage_write", "adios_write_method", 5, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("stage_write", "write_method_params", 6, ["have_metadata_file=0;num_aggregators=8"]), p.ParamCmdLineArg("stage_write", "variables_to_transform", 7, ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]), p.ParamCmdLineArg("stage_write", "transform_params", 8, ["none","zlib:9", "bzip2:9","lz4"]), ]), p.Sweep([ # 64 exaalt nodes p.ParamRunner("exaalt", "nprocs", [1024]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), # 4, 2, or 1 stage nodes p.ParamRunner("stage_write", "nprocs", [64,32,16]), p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]), p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]), p.ParamCmdLineArg("stage_write", "adios_read_method", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]), p.ParamCmdLineArg("stage_write", "adios_write_method", 5, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("stage_write", "write_method_params", 6, ["have_metadata_file=0;num_aggregators=4"]), p.ParamCmdLineArg("stage_write", "variables_to_transform", 7, ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]), p.ParamCmdLineArg("stage_write", "transform_params", 8, ["none","zlib:9", "bzip2:9","lz4"]), ]), # No staging or compression. Simulation writes data to disk. # These are the baseline test cases. p.Sweep([ # 256 exaalt nodes p.ParamRunner("exaalt", "nprocs", [4096]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, ["have_metadata_file=0;num_aggregators=256"]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), ]), p.Sweep([ # 128 exaalt nodes p.ParamRunner("exaalt", "nprocs", [2048]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, ["have_metadata_file=0;num_aggregators=128"]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), ]), p.Sweep([ # 64 exaalt nodes p.ParamRunner("exaalt", "nprocs", [1024]), p.ParamCmdLineArg("exaalt", "states_list_file", 1, ["states_list.txt"]), p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]), p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]), p.ParamCmdLineArg("exaalt", "transport_method", 4, ["MPI_AGGREGATE"]), p.ParamCmdLineArg("exaalt", "transport_variables", 5, ["have_metadata_file=0;num_aggregators=64"]), p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]), ]), ]) ]
class GrayScott(Campaign): # A name for the campaign name = "gray_scott" # Define your workflow. Setup the applications that form the workflow. # exe may be an absolute path. # The adios xml file is automatically copied to the campaign directory. # 'runner_override' may be used to launch the code on a login/service node as a serial code # without a runner such as aprun/srun/jsrun etc. codes = [ ("simulation", dict(exe="gray-scott", adios_xml_file='adios2.xml')), ("pdf_calc", dict(exe="pdf_calc", adios_xml_file='adios2.xml', runner_override=False)), ] # List of machines on which this code can be run supported_machines = ['local', 'titan', 'theta'] # Kill an experiment right away if any workflow components fail (just the experiment, not the whole group) kill_on_partial_failure = True # Any setup that you may need to do in an experiment directory before the experiment is run run_dir_setup_script = None # A post-process script that is run for every experiment after the experiment completes run_post_process_script = None # Directory permissions for the campaign sub-directories umask = '027' # Options for the underlying scheduler on the target system. Specify the project ID and job queue here. scheduler_options = { 'theta': { 'project': 'CSC249ADCD01', 'queue': 'default' } } # A way to setup your environment before the experiment runs. Export environment variables such as LD_LIBRARY_PATH here. app_config_scripts = {'local': 'setup.sh', 'theta': 'env_setup.sh'} # Setup the sweep parameters for a Sweep sweep1_parameters = [ # ParamRunner 'nprocs' specifies the no. of ranks to be spawned p.ParamRunner('simulation', 'nprocs', [512]), # Create a ParamCmdLineArg parameter to specify a command line argument to run the application p.ParamCmdLineArg('simulation', 'settings', 1, ["settings.json"]), # Edit key-value pairs in the json file # Sweep over two values for the F key in the json file. Alongwith 4 values for the nprocs property for # the pdf_calc code, this Sweep will create 2*4 = 8 experiments. p.ParamConfig('simulation', 'feed_rate_U', 'settings.json', 'F', [0.01, 0.02]), p.ParamConfig('simulation', 'kill_rate_V', 'settings.json', 'k', [0.048]), # Setup an environment variable # p.ParamEnvVar ('simulation', 'openmp', 'OMP_NUM_THREADS', [4]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling. # As both the applications use the same xml file, you need to do this just once. p.ParamADIOS2XML('simulation', 'SimulationOutput', 'engine', [{ 'SST': {} }]), # Now setup options for the pdf_calc application. # Sweep over four values for the nprocs p.ParamRunner('pdf_calc', 'nprocs', [32, 64, 128, 256]), p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']), p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf']), ] # Create a Sweep object. This one does not define a node-layout, and thus, all cores of a compute node will be # utilized and mapped to application ranks. sweep1 = p.Sweep(parameters=sweep1_parameters) # Create another Sweep object and set its node-layout to spawn 16 simulation processes per node, and # 4 processes of pdf_calc per node. On Theta, different executables reside on separate nodes as node-sharing # is not permitted on Theta. sweep2_parameters = copy.deepcopy(sweep1_parameters) sweep2 = p.Sweep( node_layout={'theta': [{ 'simulation': 16 }, { 'pdf_calc': 4 }]}, parameters=sweep2_parameters) # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes, sweepGroup1 = p.SweepGroup( "sg-1", # A unique name for the SweepGroup walltime=3600, # Total runtime for the SweepGroup per_run_timeout= 600, # Timeout for each experiment parameter_groups=[sweep1, sweep2], # Sweeps to include in this group launch_mode='default', # Launch mode: default, or MPMD if supported nodes=128, # No. of nodes for the batch job. # rc_dependency={'pdf_calc':'simulation',}, # Specify dependencies between workflow components run_repetitions= 2, # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3) ) # Activate the SweepGroup sweeps = [sweepGroup1]
class NWChem(Campaign): # A name for the campaign name = "nwchem" # Define your workflow. Setup the applications that form the workflow. # exe may be an absolute path. # The adios xml file is automatically copied to the campaign directory. # 'runner_override' may be used to launch the code on a login/service node as a serial code # without a runner such as aprun/srun/jsrun etc. codes = [ ("nwchem_main", dict( exe= "/ccs/proj/e2e/pnorbert/ADIOS/ADIOS2/build.rhea.gcc/install/bin/adios2_iotest", adios_xml_file='copro.xml')), ("sorting", dict( exe= "/ccs/proj/e2e/pnorbert/ADIOS/ADIOS2/build.rhea.gcc/install/bin/adios2_iotest", adios_xml_file='copro.xml', runner_override=False)), ] # List of machines on which this code can be run supported_machines = ['local', 'titan', 'theta', 'rhea'] # Kill an experiment right away if any workflow components fail (just the experiment, not the whole group) kill_on_partial_failure = True # Any setup that you may need to do in an experiment directory before the experiment is run run_dir_setup_script = None # A post-process script that is run for every experiment after the experiment completes run_post_process_script = None # Directory permissions for the campaign sub-directories umask = '027' # Options for the underlying scheduler on the target system. Specify the project ID and job queue here. # scheduler_options = {'theta': {'project':'CSC249ADCD01', 'queue': 'default'}} scheduler_options = {'rhea': {'project': 'csc143'}} # A way to setup your environment before the experiment runs. Export environment variables such as LD_LIBRARY_PATH here. app_config_scripts = { 'local': 'setup.sh', 'theta': 'env_setup.sh', 'rhea': 'setup_nwchem_rhea.sh' } # Setup the sweep parameters for a Sweep sweep1_parameters = [ # ParamRunner 'nprocs' specifies the no. of ranks to be spawned p.ParamRunner('nwchem_main', 'nprocs', [80]), # Create a ParamCmdLineArg parameter to specify a command line argument to run the application p.ParamCmdLineOption('nwchem_main', 'app', '-a', [1]), p.ParamCmdLineOption('nwchem_main', 'app-config', '-c', ['copro-80.txt']), p.ParamCmdLineOption('nwchem_main', 'adios-config', '-x', ['copro.xml']), p.ParamCmdLineOption('nwchem_main', 'strongscaling', '-w', [None]), p.ParamCmdLineOption('nwchem_main', 'timing', '-t', [None]), p.ParamCmdLineOption('nwchem_main', 'decomposition', '-d', [80]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling. p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [{ 'BP4': { 'OpenTimeoutSecs': '30.0' } }]), # Now setup options for the pdf_calc application. # Sweep over four values for the nprocs p.ParamRunner('sorting', 'nprocs', [8]), p.ParamCmdLineOption('sorting', 'app', '-a', [1]), p.ParamCmdLineOption('sorting', 'app-config', '-c', ['copro-80.txt']), p.ParamCmdLineOption('sorting', 'adios-config', '-x', ['copro.xml']), p.ParamCmdLineOption('sorting', 'weakscaling', '-s', [None]), p.ParamCmdLineOption('sorting', 'timing', '-t', [None]), p.ParamCmdLineOption('sorting', 'decomposition', '-d', [8]), # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling. p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [{ 'BP4': { 'OpenTimeoutSecs': '30.0' } }]), ] #print(sweep1_parameters) sweep2_parameters = sweep1_parameters.copy() sweep2_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [{ 'SST': {} }]) sweep2_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [{ 'SST': {} }]) sweep3_parameters = sweep1_parameters.copy() sweep3_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [{ "Null": {} }]) sweep3_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [{ "Null": {} }]) sweep4_parameters = sweep1_parameters.copy() sweep4_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [{ 'BP4': { 'OpenTimeoutSecs': '30.0', 'BurstBufferPath': '/tmp' } }]) sweep4_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [{ 'BP4': { 'OpenTimeoutSecs': '30.0', 'BurstBufferPath': '/tmp' } }]) #sweep4_parameters = sweep1_parameters.copy() #sweep4_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [ {'SSC':{'DataTransport':'WAN'}} ]) #sweep4_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [ {'SSC':{'DataTransport':'WAN'}} ]) # Create Sweep objects. This one does not define a node-layout, and thus, all cores of a compute node will be # utilized and mapped to application ranks. sweep1 = p.Sweep(parameters=sweep1_parameters) sweep2 = p.Sweep(parameters=sweep2_parameters) sweep3 = p.Sweep(parameters=sweep3_parameters) sweep4 = p.Sweep(parameters=sweep4_parameters) # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes, sweepGroup1 = p.SweepGroup( "nwchem-adios", # A unique name for the SweepGroup walltime=18060, # Total runtime for the SweepGroup per_run_timeout= 500, # Timeout for each experiment parameter_groups=[sweep1, sweep2, sweep3, sweep4], # Sweeps to include in this group launch_mode='default', # Launch mode: default, or MPMD if supported nodes=6, # No. of nodes for the batch job. run_repetitions= 2, # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3) component_inputs={'nwchem_main': ['copro-80.txt']}, ) # Activate the SweepGroup sweeps = [sweepGroup1]
class HeatTransfer(Campaign): """Small example to run the heat_transfer application with stage_write, using no compression, zfp, or sz. All other options are fixed, so there are only three runs.""" name = "heat-transfer-simple" # This applications consists of two codes, with nicknames "heat" and # "stage", exe locations as specified, and a delay of 5 seconds # between starting stage and heat. codes = [('stage', dict(exe="stage_write/stage_write", sleep_after=5)), ('heat', dict(exe="heat_transfer_adios2", sleep_after=0, adios_xml_file="heat_transfer.xml"))] # The application is designed to run on two machines. # (These are magic strings known to Cheetah.) supported_machines = ['local', 'titan'] # Inputs are copied to each "run directory" -- directory created by # Cheetah for each run. The adios_xml_file for each code specified # above is included automatically, so does not need to be specified # here. inputs = [] # If the heat or stage code fails (nonzero exit code) during a run, # kill the other code if still running. This is useful for multi-code # apps that require all codes to complete for useful results. This # is usually the case when using an adios stage code. kill_on_partial_failure = True # This script will be for every run directory during campaign # creation. This can be used to customize the directory structure # needed for the application - this example simply creates an extra # directory. run_dir_setup_script = "run-dir-setup-mkdir.sh" # Options to pass to the scheduler (PBS or slurm). These are set per # target machine, since likely different options will be needed for # each. scheduler_options = {"titan": {"project": "CSC242", "queue": "debug"}} sweeps = [ # Each SweepGroup specifies a set of runs to be performed on a specified # number of nodes. Here we have 1 SweepGroup, which will run on 4 nodes # on titan. On titan each executable consumes an entire node, even if it # doesn't make use of all processes on the node, so this will run # the first two instances at the same time across four nodes, and # start the last instance as soon as one of those two instances # finish. On a supercomputer without this limitation, with nodes # that have >14 processes, all three could be submitted at the same # time with one node unused. p.SweepGroup( "small_scale", # Create separate subdir for each component component_subdirs=True, # Required. Set walltime for scheduler job. walltime=3600, # Optional. If set, each run in the sweep # group will be killed if not complete # after this many seconds. per_run_timeout=600, # Optional. Set max number of processes to run # in parallel. Must fit on the nodes # specified for each target machine, and # each run in the sweep group must use no # more then this number of processes. If # not specified, will be set to the max # of any individual run. Can be used to # do runs in parallel, i.e. setting to 28 # for this experiment will allow two runs # at a time, since 28/14=2. max_procs=28, # Optional. Provide a list of input files per component # that will be copied into the working dir of the # component. # Copying source file just for demonstration. component_inputs={ "stage": ["stage_write/utils.h", "stage_write/decompose.h"] }, # Within a SweepGroup, each parameter_group specifies arguments for # each of the parameters required for each code. Number of runs is the # product of the number of options specified. Below, it is 3, as only # one parameter has >1 arguments. There are two types of parameters # used below: system ("ParamRunner") and positional command line # arguments (ParamCmdLineArg). Also supported: command line options # (ParamCmdLineOption), ADIOS XML config file (ParamAdiosXML) parameter_groups=[ p.Sweep([ # First, the parameters for the STAGE program # ParamRunner passes an argument to launch_multi_swift # nprocs: Number of processors (aka process) to use p.ParamRunner("stage", "nprocs", [2]), # ParamCmdLineArg passes a positional argument to the application # Arguments are: # 1) Code name (e.g., "stage"), # 2) Logical name for parameter, used in output; # 3) positional argument number; # 4) options p.ParamCmdLineArg("stage", "input", 1, ["heat.bp"]), p.ParamCmdLineArg("stage", "output", 2, ["staged.bp"]), p.ParamCmdLineArg("stage", "rmethod", 3, ["FLEXPATH"]), p.ParamCmdLineArg("stage", "ropt", 4, [""]), p.ParamCmdLineArg("stage", "wmethod", 5, ["MPI"]), p.ParamCmdLineArg("stage", "wopt", 6, [""]), p.ParamCmdLineArg("stage", "variables", 7, ["T,dT"]), p.ParamCmdLineArg( "stage", "transform", 8, ["none", "zfp:accuracy=.001", "sz:accuracy=.001"]), p.ParamCmdLineArg("stage", "decomp", 9, [2]), # Second, the parameters for the HEAT program # Parameters that are derived from other explicit parameters can be # specified as a function taking a dict of the other parameters # as input and returning the value. p.ParamRunner( "heat", "nprocs", lambda d: d["heat"]["xprocs"] * d["heat"]["yprocs"]), p.ParamCmdLineArg("heat", "output", 1, ["heat"]), p.ParamCmdLineArg("heat", "xprocs", 2, [4]), p.ParamCmdLineArg("heat", "yprocs", 3, [3]), p.ParamCmdLineArg("heat", "xsize", 4, [40]), p.ParamCmdLineArg("heat", "ysize", 5, [50]), p.ParamCmdLineArg("heat", "steps", 6, [6]), p.ParamCmdLineArg("heat", "iterations", 7, [5]), ]), ]), ]
class Adios_iotest(Campaign): # A name for the campaign name = "ADIOS_IOTEST" # A list of the codes that will be part of the workflow # If there is an adios xml file associated with the codes, list it here codes = [("writer", dict(exe="adios_iotest")), ("reader", dict(exe="adios_iotest"))] # A list of machines that this campaign must be supported on supported_machines = ['local', 'theta', 'summit'] # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails kill_on_partial_failure = True # Some pre-processing in the experiment directory # This is performed when the campaign directory is created (before the campaign is launched) run_dir_setup_script = None # A post-processing script to be run in the experiment directory after the experiment completes # For example, removing some large files after the experiment is done run_post_process_script = 'cleanup.sh' # umask applied to your directory in the campaign so that colleagues can view files umask = '027' # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine scheduler_options = { 'theta': { 'project': 'CSC249ADCD01', 'queue': 'batch' }, 'summit': { 'project': 'csc303' } } # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc. # Ensure this script is executable app_config_scripts = { 'local': 'env_setup.sh', 'theta': 'env_setup.sh', 'summit': 'env_setup.sh' } input_files = [ 'staging-perf-test-16MB-2to1.txt', 'staging-perf-test-16MB-8to1.txt', 'staging-perf-test-1MB-2to1.txt', 'staging-perf-test-1MB-8to1.txt', 'staging-perf-test-512MB-2to1.txt', 'staging-perf-test-512MB-8to1.txt', 'staging-perf-test-bp4.xml', 'staging-perf-test-insitumpi.xml', 'staging-perf-test-ssc.xml', 'staging-perf-test-sst-rdma.xml', 'staging-perf-test-sst-tcp.xml' ] # Create the sweep parameters for a sweep params = {} params['writer'] = {} params['reader'] = {} params['writer']['nprocs'] = p.ParamRunner('writer', 'nprocs', []) params['writer']['appid'] = p.ParamCmdLineOption('writer', 'appid', '-a', [1]) params['writer']['configfile'] = p.ParamCmdLineOption( 'writer', 'configFile', '-c', []) params['writer']['scaling'] = p.ParamCmdLineOption('writer', 'scaling', '-w', [None]) params['writer']['xmlfile'] = p.ParamCmdLineOption('writer', 'xmlfile', '-x', []) params['writer']['decomposition'] = p.ParamCmdLineOption( 'writer', 'decomposition', '-d', []) params['reader']['nprocs'] = p.ParamRunner('reader', 'nprocs', []) params['reader']['appid'] = p.ParamCmdLineOption('reader', 'appid', '-a', [2]) params['reader']['configfile'] = p.ParamCmdLineOption( 'reader', 'configFile', '-c', []) params['reader']['scaling'] = p.ParamCmdLineOption('reader', 'scaling', '-w', [None]) params['reader']['xmlfile'] = p.ParamCmdLineOption('reader', 'xmlfile', '-x', []) params['reader']['decomposition'] = p.ParamCmdLineOption( 'reader', 'decomposition', '-d', []) sweeps = [] for n in [8]: group_sweeps = get_sweeps(params, n * 32) # pdb.set_trace() s_group = p.SweepGroup( "{}-nodes".format(n), walltime=7200, per_run_timeout=600, component_inputs={'writer': input_files}, #nodes=128, parameter_groups=group_sweeps, ) sweeps.append(s_group)