Beispiel #1
0
class GrayScott(Campaign):
    name = "Gray-Scott-compression"
    codes = [("gray-scott", dict(exe="gray-scott", sleep_after=1)), ("compression", dict(exe="compression")) ]
    supported_machines = ['local', 'theta']
    scheduler_options = {
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        }
    }
    umask = '027'
    sweeps = [
     p.SweepGroup(name="gsc",
                  walltime=timedelta(minutes=30),
                  component_subdirs=True,
                  component_inputs={
                      'gray-scott': ['settings.json','adios2.xml'],
                      'compression': ['adios2.xml','sz.config','zc.config']
                  },
      parameter_groups=
      [p.Sweep([
          p.ParamCmdLineArg("gray-scott", "settings", 1, ["settings.json"]),
          p.ParamConfig("gray-scott", "L", "settings.json", "L",
                          [32]),
          p.ParamConfig("gray-scott", "noise", "settings.json", "noise",
                          [0.01]),
          p.ParamRunner('gray-scott', 'nprocs', [4] ),
          p.ParamCmdLineArg("compression", "input", 1, ["../gray-scott/gs.bp"]),
          p.ParamCmdLineArg("compression", "output", 2, ["CompressionOutput.bp"]),
          p.ParamCmdLineArg("compression", "compressor", 3, ["1"]),          
          p.ParamRunner('compression', 'nprocs', [1] )          
        ]),
      ]),
    ]
Beispiel #2
0
def create_experiment(writer_nprocs,
                      configFile,
                      scalingType,
                      adios_xml_file,
                      writer_decomposition,
                      machine_name,
                      node_layout,
                      post_hoc=False):
    """
    Creates a sweep object that tells Cheetah how to run the adios io test.
    Assumes 1D decomposition.
    """

    params = [
        p.ParamRunner('writer', 'nprocs', [writer_nprocs]),
        p.ParamCmdLineOption('writer', 'appid', '-a', [1]),
        p.ParamCmdLineOption('writer', 'configFile', '-c', [configFile]),
        p.ParamCmdLineOption('writer', 'scaling', scalingType, [None]),
        p.ParamCmdLineOption('writer', 'adios_xml_file', '-x',
                             [adios_xml_file]),
        p.ParamCmdLineOption('writer', 'decomposition', '-d',
                             [writer_decomposition]),
        p.ParamCmdLineOption('writer', 'timing_info', '-t', [None]),
    ]

    rc_dependency = None

    sweep = p.Sweep(parameters=params, rc_dependency=rc_dependency)
    if node_layout:
        sweep.node_layout = {machine_name: node_layout}

    return sweep
Beispiel #3
0
def asynchronous_zfp():
    sweep_parameters = [
        p.ParamRunner('simulation', 'nprocs', [64]),
        p.ParamCmdLineArg('simulation', 'settings', 1,
                          ["settings-files.json"]),
        p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput',
                         'engine', [{
                             'BP4': {}
                         }]),
        p.ParamRunner('pdf_calc', 'nprocs', [8]),
        p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']),
        p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']),
        p.ParamCmdLineArg('pdf_calc', 'bins', 3, [100]),
        p.ParamCmdLineArg('pdf_calc', 'write_orig_data', 4, ['YES']),
        p.ParamADIOS2XML('pdf_calc', 'zfp compression', 'PDFAnalysisOutput',
                         'var_operation', [{
                             "U": {
                                 "zfp": {
                                     'accuracy': 0.001
                                 }
                             }
                         }, {
                             "U": {
                                 "zfp": {
                                     'accuracy': 0.0001
                                 }
                             }
                         }]),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency={'pdf_calc': 'simulation'},
                    node_layout={'summit': node_layouts.separate_nodes()})

    return sweep
Beispiel #4
0
def inline_analysis(sim_nprocs):
    sweep_parameters = [
        p.ParamRunner('sim_inline_rdf_calc', 'nprocs', [sim_nprocs]),
        p.ParamCmdLineOption('sim_inline_rdf_calc', 'input', '-in',
                             ["in.lj.rdf.nodump"]),
    ]
    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency=None,
                    node_layout={'summit': node_layouts.all_sim_nodes()})
    return sweep
Beispiel #5
0
def posthoc_analysis(sim_nprocs, analysis_nprocs):
    sweep_parameters = [
        p.ParamRunner('simulation', 'nprocs', [sim_nprocs]),
        p.ParamCmdLineOption('simulation', 'sim input', '-in',
                             ["in.lj.nordf"]),
        p.ParamADIOS2XML('simulation', 'sim output engine', 'custom', 'engine',
                         [{
                             'BP4': {}
                         }]),
        p.ParamRunner('rdf_calc', 'nprocs', [analysis_nprocs]),
        p.ParamCmdLineOption('rdf_calc', 'input', '-in', ["in.lj.rdf.rerun"]),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency={'rdf_calc': 'simulation'},
                    node_layout={'summit': node_layouts.separate_nodes()})

    return sweep
Beispiel #6
0
def insitu_analysis(node_layout):
    sweep_parameters = [
        p.ParamRunner('simulation', 'nprocs', [64]),
        p.ParamCmdLineArg('simulation', 'settings', 1,
                          ["settings-staging.json"]),
        p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput',
                         'engine', [{
                             'SST': {}
                         }]),
        p.ParamRunner('pdf_calc', 'nprocs', [4, 8, 16, 32]),
        p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']),
        p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency=None,
                    node_layout={'summit': node_layout})

    return sweep
Beispiel #7
0
def posthoc_analysis():
    sweep_parameters = [
        p.ParamRunner('simulation', 'nprocs', [4]),
        p.ParamCmdLineArg('simulation', 'settings', 1,
                          ["settings-files.json"]),
        p.ParamADIOS2XML('simulation', 'sim output engine', 'SimulationOutput',
                         'engine', [{
                             'BP4': {}
                         }]),
        p.ParamRunner('pdf_calc', 'nprocs', [4]),
        p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']),
        p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf.bp']),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency={'pdf_calc': 'simulation'},
                    node_layout={'summit': node_layouts.separate_nodes()})

    return sweep
Beispiel #8
0
def insitu_analysis(sim_nprocs, analysis_nprocs, node_layout, adios_engine):
    sweep_parameters = [
        p.ParamRunner('simulation', 'nprocs', [sim_nprocs]),
        p.ParamCmdLineOption('simulation', 'sim input', '-in',
                             ["in.lj.nordf"]),
        p.ParamADIOS2XML('simulation', 'sim output engine', 'custom', 'engine',
                         [{
                             adios_engine: {}
                         }]),
        p.ParamRunner('rdf_calc', 'nprocs', [analysis_nprocs]),
        p.ParamCmdLineOption('rdf_calc', 'input', '-in', ["in.lj.rdf.rerun"]),
        p.ParamADIOS2XML('rdf_calc', 'analysis input engine', 'read_dump',
                         'engine', [{
                             adios_engine: {}
                         }]),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    rc_dependency=None,
                    node_layout={'summit': node_layout})

    return sweep
Beispiel #9
0
def create_experiment(writer_nprocs, reader_nprocs, config_file, adios_xml_file, engine, writer_decomposition, reader_decomposition, machine_name, node_layout):
    """
    Creates a sweep object that tells Cheetah how to run the adios io test.
    Assumes 1D decomposition.
    """
    # print(adios_xml_file)
    # print(engine)
    params = [
            # ParamRunner 'nprocs' specifies the no. of ranks to be spawned 
            p.ParamRunner       ('writer', 'nprocs', [writer_nprocs]),
            # Create a ParamCmdLineArg parameter to specify a command line argument to run the application
            p.ParamCmdLineOption   ('writer', 'app', '-a', [1]),
            p.ParamCmdLineOption   ('writer', 'app-config', '-c', [config_file]),
            p.ParamCmdLineOption   ('writer', 'adios-config', '-x', [adios_xml_file]),
            p.ParamCmdLineOption   ('writer', 'strongscaling', '-w', [None]),
            p.ParamCmdLineOption   ('writer', 'timing', '-t', [None]),
            p.ParamCmdLineOption   ('writer', 'decomposition', '-d', [writer_decomposition]),
            # Change the engine for the 'SimulationOutput' IO object in the adios xml file
            p.ParamADIOS2XML    ('writer', 'dump_trajectory', 'trj_dump_out', 'engine', [engine]),
            # Sweep over four values for the nprocs 
            p.ParamRunner       ('reader', 'nprocs', [reader_nprocs]),
            p.ParamCmdLineOption   ('reader', 'app', '-a', [2]),
            p.ParamCmdLineOption   ('reader', 'app-config', '-c', [config_file]),
            p.ParamCmdLineOption   ('reader', 'adios-config', '-x', [adios_xml_file]),
            p.ParamCmdLineOption   ('reader', 'weakscaling', '-s', [None]),
            p.ParamCmdLineOption   ('reader', 'timing', '-t', [None]),
            p.ParamCmdLineOption   ('reader', 'decomposition', '-d', [reader_decomposition]),
            # Change the engine for the 'SimulationOutput' IO object in the adios xml file
            p.ParamADIOS2XML    ('reader', 'load_trajectory', 'trj_dump_in', 'engine', [engine]),
    ]

    sweep = p.Sweep(parameters=params)
    if node_layout:
        sweep.node_layout = {machine_name: node_layout}

    return sweep
def create_experiment(writer_nprocs, reader_nprocs, trj_engine, sorted_trj_engine, machine_name, node_layout):
    """
    Creates a sweep object that tells Cheetah how to run the adios io test.
    Assumes 1D decomposition.
    """
    # print(adios_xml_file)
    # print(engine)
    params = [
            # ParamRunner 'nprocs' specifies the no. of ranks to be spawned 
            p.ParamRunner       ('writer', 'nprocs', [writer_nprocs]),
            # Create a ParamCmdLineArg parameter to specify a command line argument to run the application
            p.ParamCmdLineArg   ('writer', 'config', 1, ['copro.nw']),
            # Change the engine for the 'SimulationOutput' IO object in the adios xml file
            p.ParamADIOS2XML    ('writer', 'trajectory', 'trj', 'engine', [trj_engine]),
            # Sweep over four values for the nprocs 
            p.ParamRunner       ('reader', 'nprocs', [reader_nprocs]),
            p.ParamCmdLineArg   ('reader', 'input_md', 1, ['copro_md']),
            p.ParamCmdLineArg   ('reader', 'verbose', 2, [1]),
            # Change the engine for the 'SimulationOutput' IO object in the adios xml file
            p.ParamADIOS2XML    ('reader', 'sorted_trj', 'SortingOutput', 'engine', [sorted_trj_engine]),
            p.ParamRunner       ('analyzer', 'nprocs', [1]),
            p.ParamCmdLineArg   ('analyzer', 'script', 1, ['pca3d.R']),
            p.ParamCmdLineArg   ('analyzer', 'window', 2, [100]),
            p.ParamCmdLineArg   ('analyzer', 'stride', 3, [10]),
            p.ParamCmdLineArg   ('analyzer', 'k', 4, [5]),
            p.ParamCmdLineArg   ('analyzer', 'sorted_trj', 5, ['copro_md_trj.bp']),
            p.ParamCmdLineArg   ('analyzer', 'xml', 6, ['adios2.xml']),
            p.ParamCmdLineArg   ('analyzer', 'mcCore', 7, [1]),
            p.ParamCmdLineArg   ('analyzer', 'output', 8, ['pairs.pdf']),
    ]

    sweep = p.Sweep(parameters=params)
    if node_layout:
        sweep.node_layout = {machine_name: node_layout}

    return sweep
Beispiel #11
0
class GrayScott(Campaign):
    # A name for the campaign
    name = "gray_scott"

    # Define your workflow. Setup the applications that form the workflow.
    # exe may be an absolute path.
    # The adios xml file is automatically copied to the campaign directory.
    # 'runner_override' may be used to launch the code on a login/service node as a serial code
    #   without a runner such as aprun/srun/jsrun etc.
    codes = [
        ("simulation", dict(exe="gray-scott", adios_xml_file='adios2.xml')),
        ("pdf_calc",
         dict(exe="pdf_calc",
              adios_xml_file='adios2.xml',
              runner_override=False)),
    ]

    # List of machines on which this code can be run
    supported_machines = ['local', 'titan', 'theta']

    # Kill an experiment right away if any workflow components fail (just the experiment, not the whole group)
    kill_on_partial_failure = True

    # Any setup that you may need to do in an experiment directory before the experiment is run
    run_dir_setup_script = None

    # A post-process script that is run for every experiment after the experiment completes
    run_post_process_script = None

    # Directory permissions for the campaign sub-directories
    umask = '027'

    # Options for the underlying scheduler on the target system. Specify the project ID and job queue here.
    scheduler_options = {
        'theta': {
            'project': 'CSC249ADCD01',
            'queue': 'default'
        }
    }

    # A way to setup your environment before the experiment runs. Export environment variables such as LD_LIBRARY_PATH here.
    app_config_scripts = {'local': 'setup.sh', 'theta': 'env_setup.sh'}

    # Create a Sweep object. This one does not define a node-layout, and thus, all cores of a compute node will be
    #   utilized and mapped to application ranks.
    sweep1 = p.Sweep(parameters=get_sweep_params('SST'))

    sweep2 = p.Sweep(
        parameters=get_sweep_params('BP4'),
        rc_dependency={'pdf_calc': 'simulation'
                       },  # Specify dependencies between workflow components
    )

    # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes,
    sweepGroup1 = p.SweepGroup(
        "sst",  # A unique name for the SweepGroup
        walltime=3600,  # Total runtime for the SweepGroup
        per_run_timeout=
        600,  # Timeout for each experiment                                
        parameter_groups=[sweep1],  # Sweeps to include in this group
        launch_mode='default',  # Launch mode: default, or MPMD if supported
        # nodes=128,  # No. of nodes for the batch job.
        # tau_profiling=True,
        # tau_tracing=False,
        run_repetitions=
        0,  # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3)
    )

    # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes,
    sweepGroup2 = p.SweepGroup(
        "posthoc",  # A unique name for the SweepGroup
        walltime=3600,  # Total runtime for the SweepGroup
        per_run_timeout=
        600,  # Timeout for each experiment                                
        parameter_groups=[sweep2],  # Sweeps to include in this group
        launch_mode='default',  # Launch mode: default, or MPMD if supported
        # tau_profiling=True,
        # tau_tracing=False,
        # nodes=128,  # No. of nodes for the batch job.
        run_repetitions=
        0,  # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3)
    )

    # Activate the SweepGroup
    sweeps = {'MACHINE_ANY': [sweepGroup1, sweepGroup2]}
Beispiel #12
0
class ProducerConsumer(Campaign):

    # A name for the campaign
    name = "coupling-example"

    # WORKFLOW SETUP
    #---------------
    # A list of the codes that will be part of the workflow
    # If there is an adios xml file associated with the codes, list it here
    # 'sleep_after' represents the time gap after which the next code is spawned
    # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the
    #   code as a serial application
    codes = [
        ("producer",
         dict(exe="program/producer.py",
              adios_xml_file='adios2.xml',
              sleep_after=5)),
    ]

    # CAMPAIGN SETTINGS
    #------------------
    # A list of machines that this campaign is supported on
    supported_machines = [
        'local', 'titan', 'theta', 'summit', 'deepthought2_cpu', 'sdg_tm76'
    ]

    # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails
    kill_on_partial_failure = True

    # Some pre-processing in the experiment directory
    # This is performed when the campaign directory is created (before the campaign is launched)
    run_dir_setup_script = None

    # A post-processing script to be run in the experiment directory after the experiment completes
    # For example, removing some large files after the experiment is done
    run_post_process_script = None

    # umask applied to your directory in the campaign so that colleagues can view files
    umask = '027'

    # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine

    scheduler_options = {
        "cori": {
            "queue": "debug",
            "constraint": "haswell",
            "license": "SCRATCH,project",
        },
        "titan": {
            "queue": "debug",
            "project": "csc242",
        },
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        },
        "summit": {
            'project': 'csc299'
        }
    }

    # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc.
    # Ensure this script is executable
    # app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'}

    # PARAMETER SWEEPS
    #-----------------
    # Setup how the workflow is run, and what values to 'sweep' over
    # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on.
    sweep1_parameters = [
        p.ParamRunner('producer', 'nprocs', [2]),
        p.ParamCmdLineArg('producer', 'array_size_per_pe', 1, [
            1024 * 1024,
        ]),  # 1M, 2M, 10M
        p.ParamCmdLineArg('producer', 'num_steps', 2, [2]),
        #p.ParamADIOS2XML    ('producer', 'engine_sst', 'producer', 'engine', [ {"BP4": {}} ]),
    ]

    node = SummitNode()
    node.cpu[0] = f"producer:0"
    node.cpu[1] = f"producer:1"
    node_layout = [node]

    # Create a sweep
    # node_layout represents no. of processes per node
    sweep1 = p.Sweep(node_layout={'summit': node_layout},
                     parameters=sweep1_parameters,
                     rc_dependency=None)

    # Create a sweep group from the above sweep. You can place multiple sweeps in the group.
    # Each group is submitted as a separate job.
    sweepGroup1 = p.SweepGroup(
        "sg-1",
        walltime=300,
        per_run_timeout=60,
        parameter_groups=[sweep1],
        launch_mode='default'  #,  # or MPMD
        #tau_profiling=False,
        #tau_tracing=False,
        # optional:
        # nodes=10,
        # tau_profiling=True,
        # tau_tracing=False,
        # run_repetitions=2, # <-- repeat each experiment this many times
        # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory
        # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes
        # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes'
    )

    # Sweep groups to be activated
    # sweeps = {'summit': [sweepGroup1]}
    sweeps = [sweepGroup1]
Beispiel #13
0
class Exaalt(Campaign):
    name = "Exaalt"
    codes = dict(exaalt="pt_producer_global",
                 stage_write="./stage_write/stage_write")

    supported_machines = ['titan_fob']

    project = "CSC143"
    queue = "batch"

    inputs = ["states_list.txt"]
    sweeps = [

        # Staging and compression enabled
        p.SweepGroup(
            nodes=320,
            post_processing="",
            parameter_groups=[
                p.Sweep([
                    p.ParamRunner("exaalt", "nprocs", [4096]),
                    p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                                      ["states_list.txt"]),
                    p.ParamCmdLineArg("exaalt", "no_of_states", 2,
                                      [2097152]),  #two million states
                    p.ParamCmdLineArg("exaalt", "bp_output_file", 3,
                                      ["output.bp"]),
                    p.ParamCmdLineArg("exaalt", "transport_method", 4,
                                      ["FLEXPATH"]),
                    p.ParamCmdLineArg("exaalt", "transport_variables", 5,
                                      [""]),
                    p.ParamCmdLineArg("exaalt", "transport_options", 6,
                                      ["none"]),
                    p.ParamRunner("stage_write", "nprocs", [512, 1024]),
                    p.ParamCmdLineArg("stage_write", "input_bp_file", 1,
                                      ["output.bp"]),
                    p.ParamCmdLineArg("stage_write", "output_bp_file", 2,
                                      ["staged.bp"]),
                    p.ParamCmdLineArg("stage_write", "adios_read_method", 3,
                                      ["FLEXPATH"]),
                    p.ParamCmdLineArg("stage_write", "read_method_params", 4,
                                      [""]),
                    p.ParamCmdLineArg("stage_write", "adios_write_method", 5,
                                      ["POSIX"]),
                    p.ParamCmdLineArg("stage_write", "write_method_params", 6,
                                      [""]),
                    p.
                    ParamCmdLineArg("stage_write", "variables_to_transform", 7, [
                        "atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"
                    ]),
                    p.ParamCmdLineArg("stage_write", "transform_params", 8,
                                      ["none", "zlib:9", "bzip2:9"]),
                ]),
            ]),

        # No staging or compression. Simulation writes data to disk. This is the baseline test case.
        p.SweepGroup(nodes=256,
                     post_processing="",
                     parameter_groups=[
                         p.Sweep([
                             p.ParamRunner("exaalt", "nprocs", [4096]),
                             p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                                               ["states_list.txt"]),
                             p.ParamCmdLineArg("exaalt", "no_of_states", 2,
                                               [10485760]),
                             p.ParamCmdLineArg("exaalt", "bp_output_file", 3,
                                               ["output.bp"]),
                             p.ParamCmdLineArg("exaalt", "transport_method", 4,
                                               ["POSIX"]),
                             p.ParamCmdLineArg("exaalt", "transport_variables",
                                               5, [""]),
                             p.ParamCmdLineArg("exaalt", "transport_options",
                                               6, ["none"]),
                         ]),
                     ]),
    ]
Beispiel #14
0
class ProducerConsumer(Campaign):

    # A name for the campaign
    name = "coupling-example"

    # WORKFLOW SETUP
    #---------------
    # A list of the codes that will be part of the workflow
    # If there is an adios xml file associated with the codes, list it here
    # 'sleep_after' represents the time gap after which the next code is spawned
    # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the
    #   code as a serial application
    codes = [("producer",
              dict(exe="producer.py",
                   adios_xml_file='adios2.xml',
                   sleep_after=5)),
             ("mean_calc",
              dict(exe="mean_calculator.py",
                   adios_xml_file='adios2.xml',
                   runner_override=False))]

    # CAMPAIGN SETTINGS
    #------------------
    # A list of machines that this campaign is supported on
    supported_machines = ['local', 'titan', 'theta', 'summit']

    # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails
    kill_on_partial_failure = True

    # Some pre-processing in the experiment directory
    # This is performed when the campaign directory is created (before the campaign is launched)
    run_dir_setup_script = None

    # A post-processing script to be run in the experiment directory after the experiment completes
    # For example, removing some large files after the experiment is done
    run_post_process_script = None

    # umask applied to your directory in the campaign so that colleagues can view files
    umask = '027'

    # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine
    scheduler_options = {
        'theta': {
            'project': '',
            'queue': 'batch'
        },
        'summit': {
            'project': 'CSC299'
        }
    }

    # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc.
    # Ensure this script is executable
    app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'}

    # PARAMETER SWEEPS
    #-----------------
    # Setup how the workflow is run, and what values to 'sweep' over
    # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on.
    sweep1_parameters = [
        p.ParamRunner('producer', 'nprocs', [128]),
        p.ParamRunner('mean_calc', 'nprocs', [36]),
        p.ParamCmdLineArg('producer', 'array_size_per_pe', 1,
                          [1024 * 1024]),  # 1M, 2M, 10M
        p.ParamCmdLineArg('producer', 'num_steps', 2, [10]),
        p.ParamADIOS2XML('producer', 'staging', 'producer', 'engine', [{
            "SST": {}
        }]),
    ]

    shared_node = SummitNode()
    for i in range(18):
        shared_node.cpu[i] = "producer:{}".format(math.floor(i / 6))
        shared_node.cpu[i + 21] = "producer:{}".format(math.floor(
            (i + 18) / 6))
    for i in range(3):
        shared_node.cpu[i + 18] = "mean_calc:0"
        shared_node.cpu[i + 18 + 21] = "mean_calc:0"
    for i in range(6):
        shared_node.gpu[i] = ["producer:{}".format(i)]
    shared_node_layout = [shared_node]

    shared_node_1_per_rank = SummitNode()
    for i in range(18):
        shared_node_1_per_rank.cpu[i] = "producer:{}".format(i)
        shared_node_1_per_rank.cpu[i + 21] = "producer:{}".format(i + 18)
    for i in range(3):
        shared_node_1_per_rank.cpu[i + 18] = "mean_calc:{}".format(i)
        shared_node_1_per_rank.cpu[i + 18 + 21] = "mean_calc:{}".format(i + 3)
    for i in range(6):
        shared_node_1_per_rank.gpu[i] = ["producer:{}".format(i)]
    shared_node_layout_2 = [shared_node_1_per_rank]

    shared_node_shared_gpu = SummitNode()
    for i in range(18):
        shared_node_shared_gpu.cpu[i] = "producer:{}".format(math.floor(i / 6))
        shared_node_shared_gpu.cpu[i + 21] = "producer:{}".format(
            math.floor((i + 18) / 6))
    for i in range(3):
        shared_node_shared_gpu.cpu[i + 18] = "mean_calc:0"
        shared_node_shared_gpu.cpu[i + 18 + 21] = "mean_calc:0"
    shared_node_shared_gpu.gpu[0] = ["producer:0"]
    shared_node_shared_gpu.gpu[1] = [
        "producer:0",
        "producer:1",
    ]
    shared_node_shared_gpu.gpu[2] = ["producer:0", "producer:1", 'mean_calc:0']
    shared_node_layout_3 = [shared_node_shared_gpu]

    sep_node_producer = SummitNode()
    sep_node_mean_calc = SummitNode()
    for i in range(18):
        sep_node_producer.cpu[i] = "producer:{}".format(math.floor(i / 6))
    for i in range(3):
        sep_node_mean_calc.cpu[i + 18] = "mean_calc:0"
        sep_node_mean_calc.cpu[i + 18 + 21] = "mean_calc:0"
    for i in range(3):
        sep_node_producer.gpu[i] = ["producer:{}".format(i)]
    sep_node_layout = [sep_node_producer, sep_node_mean_calc]

    # Create a sweep
    # node_layout represents no. of processes per node
    sweep1 = p.Sweep(node_layout={'summit': shared_node_layout},
                     parameters=sweep1_parameters,
                     rc_dependency=None)
    sweep2 = p.Sweep(node_layout={'summit': shared_node_layout_2},
                     parameters=sweep1_parameters,
                     rc_dependency=None)
    sweep3 = p.Sweep(node_layout={'summit': shared_node_layout_3},
                     parameters=sweep1_parameters,
                     rc_dependency=None)
    sweep4 = p.Sweep(node_layout={'summit': sep_node_layout},
                     parameters=sweep1_parameters,
                     rc_dependency=None)

    # Create a sweep group from the above sweep. You can place multiple sweeps in the group.
    # Each group is submitted as a separate job.
    sweepGroup1 = p.SweepGroup(
        "sg-1",
        walltime=300,
        per_run_timeout=60,
        parameter_groups=[sweep1, sweep2, sweep3, sweep4],
        launch_mode='default',  # or MPMD
        # optional:
        # tau_profiling=True,
        # tau_tracing=False,
        # nodes=10,
        # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory
        # component_inputs = {'producer': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes
        # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes'
    )

    # Sweep groups to be activated
    sweeps = {'summit': [sweepGroup1]}
def get_sweeps(ref_params_d, n_writers):
    params_d = copy.deepcopy(ref_params_d)
    params_d['writer']['nprocs'].values = [n_writers]
    params_d['writer']['decomposition'].values = [n_writers]

    all_dicts = []
    all_sweeps = []

    # Loop over ratio of the no. of reader ranks
    for r in [8]:
        par_r = copy.deepcopy(params_d)
        par_r['reader']['nprocs'].values = [n_writers // r]
        par_r['reader']['decomposition'].values = [n_writers // r]

        # Loop over data size per process
        for d in ['512MB']:
            par_r_d = copy.deepcopy(par_r)
            par_r_d['writer']['configfile'].values = [
                'staging-perf-test-{}-{}to1.txt'.format(d, r)
            ]
            par_r_d['reader']['configfile'].values = [
                'staging-perf-test-{}-{}to1.txt'.format(d, r)
            ]

            # Loop over engines
            for e in ["bp4", "sst-rdma", "sst-tcp", "ssc", "insitumpi"]:
                par_r_d_e = copy.deepcopy(par_r_d)
                par_r_d_e['writer']['xmlfile'].values = [
                    'staging-perf-test-{}.xml'.format(e)
                ]
                par_r_d_e['reader']['xmlfile'].values = [
                    'staging-perf-test-{}.xml'.format(e)
                ]

                all_dicts.append(par_r_d_e)

    for d in all_dicts:
        sweep_params = []
        sweep_params.extend(list(d['writer'].values()))
        sweep_params.extend(list(d['reader'].values()))

        sep_node_layout = get_separate_node_layout(32, 32)
        shared_node_layout = None

        if d['writer']['nprocs'].values[0] // d['reader']['nprocs'].values[
                0] == 8:
            shared_node_layout = get_shared_node_layout(32, 4)
        elif n_writers // 32 < 4096:
            shared_node_layout = get_shared_node_layout(16, 16)

        rc_dependency = None
        if 'bp4' in d['writer']['xmlfile'].values[0]:
            rc_dependency = {'reader': 'writer'}
        sweep_sep = p.Sweep(parameters=sweep_params,
                            node_layout={'summit': sep_node_layout},
                            rc_dependency=rc_dependency)
        if 'insitumpi' in d['writer']['xmlfile'].values[0]:
            sweep_sep.launch_mode = 'mpmd'
        if 'ssc' in d['writer']['xmlfile'].values[0]:
            sweep_sep.launch_mode = 'mpmd'

        sweep_shared = None
        if shared_node_layout:
            sweep_shared = p.Sweep(parameters=sweep_params,
                                   node_layout={'summit': shared_node_layout},
                                   rc_dependency=rc_dependency)

        if n_writers // 32 < 4096:
            all_sweeps.append(sweep_sep)
        if sweep_shared:
            all_sweeps.append(sweep_shared)

    return all_sweeps
Beispiel #16
0
class Brusselator(Campaign):

    # A name for the campaign
    name = "Brusselator"

    # WORKFLOW SETUP
    #---------------
    # A list of the codes that will be part of the workflow
    # If there is an adios xml file associated with the codes, list it here
    # 'sleep_after' represents the time gap after which the next code is spawned
    # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the 
    #   code as a serial application
    codes = [ ("simulation", dict(exe="simulation/Brusselator", adios_xml_file='adios2.xml', sleep_after=None)),
              ("norm_calc",  dict(exe="analysis/norm_calc",     adios_xml_file='adios2.xml', runner_override=False))
            ]

    # CAMPAIGN SETTINGS
    #------------------
    # A list of machines that this campaign is supported on
    supported_machines = ['local', 'titan', 'theta']

    # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails
    kill_on_partial_failure = True

    # Some pre-processing in the experiment directory
    # This is performed when the campaign directory is created (before the campaign is launched)
    run_dir_setup_script = None

    # A post-processing script to be run in the experiment directory after the experiment completes
    # For example, removing some large files after the experiment is done
    run_post_process_script = None

    # umask applied to your directory in the campaign so that colleagues can view files
    umask = '027'

    # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine
    scheduler_options = {'titan': {'project':'CSC249ADCD01', 'queue': 'batch'}}

    # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc.
    # Ensure this script is executable
    app_config_scripts = {'local': 'setup.sh', 'titan': 'env_setup.sh'}

    # PARAMETER SWEEPS
    #-----------------
    # Setup how the workflow is run, and what values to 'sweep' over
    # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on.
    sweep1_parameters = [
            # ParamRunner with 'nprocs' sets the number of MPI processes
            p.ParamRunner        ('simulation', 'nprocs', [4,8]), # <-- how to sweep over values
            p.ParamCmdLineArg    ('simulation', 'output', 1, ['bru.bp']),
            p.ParamCmdLineArg    ('simulation', 'nx', 2, [32]),
            p.ParamCmdLineArg    ('simulation', 'ny', 3, [32]),
            p.ParamCmdLineArg    ('simulation', 'nz', 4, [32]),
            p.ParamCmdLineArg    ('simulation', 'steps', 5, [10,20,50]),  # sweep over these values. creates cross-product of runs with 'nprocs' above
            p.ParamCmdLineArg    ('simulation', 'plotgap', 6, [1]),

            p.ParamRunner        ('norm_calc', 'nprocs', [1]),
            p.ParamCmdLineArg    ('norm_calc', 'infile', 1, ['bru.bp']),
            p.ParamCmdLineArg    ('norm_calc', 'outfile', 2, ['norm_calc.out.bp']),
            p.ParamCmdLineArg    ('norm_calc', 'write_norms_only', 3, [1]),
            
            # ParamADIOS2XML can be used to setup a value in the ADIOS xml file for the application
            # Set the transport to BPFile, as we want the codes to run serially. Set the rc_dependency
            #   in the Sweep to denote dependency between the codes
            # To couple codes for concurrent execution, use a transport method such as SST
            p.ParamADIOS2XML     ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {}} ]),
            p.ParamADIOS2XML     ('simulation', 'AnalysisOutput', 'engine', [ {"BPFile": {}} ]),
            # p.ParamADIOS2XML     ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {'Threads':1}},
            # p.ParamADIOS2XML     ('simulation', 'SimulationOutput', 'engine', [ {"BPFile": {'Threads':1}}, {"BPFile": {"ProfileUnits": "Microseconds"}} ]),
            
            # Use ParamCmdLineOption for named arguments
            # p.ParamCmdLineOption ('plotting', 'input_stream', '-i', ['bru.bp']),

            # Use ParamKeyValue to set options in a key-value configuration file. Input file can be a json file.
            # File path can be relative to the path specified in '-a' or it can be absolute.
            # File will be copied to the working_dir automatically by Cheetah.
            # p.ParamKeyValue   ('simulation', 'feed_rate', 'input.conf', 'key', ['value']),

            # Sweep over environment variables
            # p.ParamEnvVar     ('simulation', 'openmp_stuff', 'OMP_NUM_THREADS', [4,8]),

            # Pass additional scheduler arguments.
            # p.ParamSchedulerArgs ('simulation', [{'-f':'hosts.txt'}])
    ]

    # Create a sweep
    # node_layout represents no. of processes per node
    # rc_dependency denotes dependency between run components. Here, norm_calc will run after simulation has finished
    sweep1 = p.Sweep (node_layout = {'titan': [{'simulation':16}, {'norm_calc': 4}] },  # simulation: 16 ppn, norm_calc: 4 ppn
                      parameters = sweep1_parameters, rc_dependency={'norm_calc':'simulation'})

    # Create a sweep group from the above sweep. You can place multiple sweeps in the group.
    # Each group is submitted as a separate job.
    sweepGroup1 = p.SweepGroup ("sg-tmp",
                                walltime=300,
                                per_run_timeout=60,
                                parameter_groups=[sweep1],
                                launch_mode='default',  # or MPMD
                                # optional:
                                # nodes=10,
                                # run_repetitions=2, # no. of times each experiment must be repeated (here, total runs = 3)
                                # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory
                                # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes
                                # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes'
                                )

    sweepGroup2 = copy.deepcopy(sweepGroup1)
    sweepGroup2.name = 'sg-2'
    
    # Sweep groups to be activated
    sweeps = [sweepGroup1, sweepGroup2]
class PlanarReadqCampaign(Campaign):

    name = "planar"

    codes = [("app1",
              dict(exe="adios_iotest",
                   adios_xml_file='adios2.xml',
                   sleep_after=5)),
             ("app2",
              dict(exe="adios_iotest",
                   adios_xml_file='adios2.xml',
                   runner_override=False))]

    run_dir_setup_script = "planar_reads.sh"

    supported_machines = ['local', 'cori', 'titan', 'theta']

    scheduler_options = {
        "cori": {
            "queue": "debug",
            "constraint": "haswell",
            "license": "SCRATCH,project",
        },
        "titan": {
            "queue": "debug",
            "project": "csc242",
        },
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        }
    }

    umask = '027'

    sweeps = [
        p.SweepGroup(
            name="all-methods",
            nodes=2,
            walltime=timedelta(minutes=30),
            parameter_groups=[
                p.Sweep([
                    p.ParamRunner("app1", "nprocs", [8]),
                    p.ParamRunner("app2", "nprocs", [8]),
                    p.ParamCmdLineOption("app1", "app ID", "-a", [1]),
                    p.ParamCmdLineOption("app1", "config file", "-c",
                                         ["planar_reads.txt"]),
                    p.ParamCmdLineOption("app1", "adios xml", "-x",
                                         ["adios2.xml"]),
                    p.ParamCmdLineArg("app1", "weak scaling", 1, ["-w"]),
                    # Fix this somehow
                    p.ParamCmdLineArg("app1", "rank decomp", 2, ["-d"]),
                    p.ParamCmdLineArg("app1", "decomp x", 3, [2]),
                    p.ParamCmdLineArg("app1", "decomp y", 4, [2]),
                    p.ParamCmdLineArg("app1", "decomp z", 5, [2]),
                    p.ParamCmdLineOption("app2", "app ID", "-a", [2]),
                    p.ParamCmdLineOption("app2", "config file", "-c",
                                         ["planar_reads.txt"]),
                    p.ParamCmdLineOption("app2", "adios xml", "-x",
                                         ["adios2.xml"]),
                    p.ParamCmdLineArg("app2", "weak scaling", 1, ["-w"]),
                    p.ParamCmdLineArg("app2", "rank decomp", 2, ["-d"]),
                    p.ParamCmdLineArg("app2", "decomp x", 3, [2]),
                    p.ParamCmdLineArg("app2", "decomp y", 4, [2]),
                    p.ParamCmdLineArg("app2", "decomp z", 5, [2]),
                    p.ParamEnvVar("app1", "cube length", "CUBE_LEN", [40]),
                    p.ParamEnvVar("app1", "read pattern", "READ_PATTERN",
                                  ["ij", "ik", "jk", "chunk"])
                ]),
            ]),
    ]
Beispiel #18
0
class HeatTransfer(Campaign):
    """Small example to run the heat_transfer application with stage_write,
    using no compression, zfp, or sz. All other options are fixed, so there
    are only three runs."""

    name = "heat-transfer-simple"

    # This applications consists of two codes, with nicknames "heat" and
    # "stage", exe locations as specified, and a delay of 5 seconds
    # between starting stage and heat.
    codes = [('stage', dict(exe="stage_write/stage_write", sleep_after=5)),
             ('heat',
              dict(exe="heat_transfer_adios2",
                   sleep_after=0,
                   adios_xml_file="heat_transfer.xml"))]

    # The application is designed to run on two machines.
    # (These are magic strings known to Cheetah.)
    supported_machines = ['local', 'titan']

    # Inputs are copied to each "run directory" -- directory created by
    # Cheetah for each run. The adios_xml_file for each code specified
    # above is included automatically, so does not need to be specified
    # here.
    inputs = []

    # If the heat or stage code fails (nonzero exit code) during a run,
    # kill the other code if still running. This is useful for multi-code
    # apps that require all codes to complete for useful results. This
    # is usually the case when using an adios stage code.
    kill_on_partial_failure = True

    # This script will be for every run directory during campaign
    # creation. This can be used to customize the directory structure
    # needed for the application - this example simply creates an extra
    # directory.
    run_dir_setup_script = "run-dir-setup-mkdir.sh"

    # Options to pass to the scheduler (PBS or slurm). These are set per
    # target machine, since likely different options will be needed for
    # each.
    scheduler_options = {"titan": {"project": "CSC242", "queue": "debug"}}

    sweeps = [

        # Each SweepGroup specifies a set of runs to be performed on a specified
        # number of nodes. Here we have 1 SweepGroup, which will run on 4 nodes
        # on titan. On titan each executable consumes an entire node, even if it
        # doesn't make use of all processes on the node, so this will run
        # the first two instances at the same time across four nodes, and
        # start the last instance as soon as one of those two instances
        # finish. On a supercomputer without this limitation, with nodes
        # that have >14 processes, all three could be submitted at the same
        # time with one node unused.
        p.SweepGroup(
            "small_scale",

            # Create separate subdir for each component
            component_subdirs=True,

            # Required. Set walltime for scheduler job.
            walltime=3600,

            # Optional. If set, each run in the sweep
            # group will be killed if not complete
            # after this many seconds.
            per_run_timeout=600,

            # Optional. Set max number of processes to run
            # in parallel. Must fit on the nodes
            # specified for each target machine, and
            # each run in the sweep group must use no
            # more then this number of processes. If
            # not specified, will be set to the max
            # of any individual run. Can be used to
            # do runs in parallel, i.e. setting to 28
            # for this experiment will allow two runs
            # at a time, since 28/14=2.
            max_procs=28,

            # Optional. Provide a list of input files per component
            # that will be copied into the working dir of the
            # component.
            # Copying source file just for demonstration.
            component_inputs={
                "stage": ["stage_write/utils.h", "stage_write/decompose.h"]
            },

            # Within a SweepGroup, each parameter_group specifies arguments for
            # each of the parameters required for each code. Number of runs is the
            # product of the number of options specified. Below, it is 3, as only
            # one parameter has >1 arguments. There are two types of parameters
            # used below: system ("ParamRunner") and positional command line
            # arguments (ParamCmdLineArg). Also supported: command line options
            # (ParamCmdLineOption), ADIOS XML config file (ParamAdiosXML)
            parameter_groups=[
                p.Sweep([

                    # First, the parameters for the STAGE program

                    # ParamRunner passes an argument to launch_multi_swift
                    # nprocs: Number of processors (aka process) to use
                    p.ParamRunner("stage", "nprocs", [2]),

                    # ParamCmdLineArg passes a positional argument to the application
                    # Arguments are:
                    # 1) Code name (e.g., "stage"),
                    # 2) Logical name for parameter, used in output;
                    # 3) positional argument number;
                    # 4) options
                    p.ParamCmdLineArg("stage", "input", 1, ["heat.bp"]),
                    p.ParamCmdLineArg("stage", "output", 2, ["staged.bp"]),
                    p.ParamCmdLineArg("stage", "rmethod", 3, ["FLEXPATH"]),
                    p.ParamCmdLineArg("stage", "ropt", 4, [""]),
                    p.ParamCmdLineArg("stage", "wmethod", 5, ["MPI"]),
                    p.ParamCmdLineArg("stage", "wopt", 6, [""]),
                    p.ParamCmdLineArg("stage", "variables", 7, ["T,dT"]),
                    p.ParamCmdLineArg(
                        "stage", "transform", 8,
                        ["none", "zfp:accuracy=.001", "sz:accuracy=.001"]),
                    p.ParamCmdLineArg("stage", "decomp", 9, [2]),

                    # Second, the parameters for the HEAT program

                    # Parameters that are derived from other explicit parameters can be
                    # specified as a function taking a dict of the other parameters
                    # as input and returning the value.
                    p.ParamRunner(
                        "heat", "nprocs",
                        lambda d: d["heat"]["xprocs"] * d["heat"]["yprocs"]),
                    p.ParamCmdLineArg("heat", "output", 1, ["heat"]),
                    p.ParamCmdLineArg("heat", "xprocs", 2, [4]),
                    p.ParamCmdLineArg("heat", "yprocs", 3, [3]),
                    p.ParamCmdLineArg("heat", "xsize", 4, [40]),
                    p.ParamCmdLineArg("heat", "ysize", 5, [50]),
                    p.ParamCmdLineArg("heat", "steps", 6, [6]),
                    p.ParamCmdLineArg("heat", "iterations", 7, [5]),
                ]),
            ]),
    ]
Beispiel #19
0
class GrayScott(Campaign):
    # A name for the campaign
    name = "gray_scott"

    # Define your workflow. Setup the applications that form the workflow.
    # exe may be an absolute path.
    # The adios xml file is automatically copied to the campaign directory.
    # 'runner_override' may be used to launch the code on a login/service node as a serial code
    #   without a runner such as aprun/srun/jsrun etc.
    codes = [
        ("simulation", dict(exe="gray-scott", adios_xml_file='adios2.xml')),
        ("pdf_calc",
         dict(exe="pdf_calc",
              adios_xml_file='adios2.xml',
              runner_override=False)),
    ]

    # List of machines on which this code can be run
    supported_machines = ['local', 'titan', 'theta']

    # Kill an experiment right away if any workflow components fail (just the experiment, not the whole group)
    kill_on_partial_failure = True

    # Any setup that you may need to do in an experiment directory before the experiment is run
    run_dir_setup_script = None

    # A post-process script that is run for every experiment after the experiment completes
    run_post_process_script = None

    # Directory permissions for the campaign sub-directories
    umask = '027'

    # Options for the underlying scheduler on the target system. Specify the project ID and job queue here.
    scheduler_options = {
        'theta': {
            'project': 'CSC249ADCD01',
            'queue': 'default'
        }
    }

    # A way to setup your environment before the experiment runs. Export environment variables such as LD_LIBRARY_PATH here.
    app_config_scripts = {'local': 'setup.sh', 'theta': 'env_setup.sh'}

    # Setup the sweep parameters for a Sweep
    sweep1_parameters = [
        # ParamRunner 'nprocs' specifies the no. of ranks to be spawned
        p.ParamRunner('simulation', 'nprocs', [512]),

        # Create a ParamCmdLineArg parameter to specify a command line argument to run the application
        p.ParamCmdLineArg('simulation', 'settings', 1, ["settings.json"]),

        # Edit key-value pairs in the json file
        # Sweep over two values for the F key in the json file. Alongwith 4 values for the nprocs property for
        #   the pdf_calc code, this Sweep will create 2*4 = 8 experiments.
        p.ParamConfig('simulation', 'feed_rate_U', 'settings.json', 'F',
                      [0.01, 0.02]),
        p.ParamConfig('simulation', 'kill_rate_V', 'settings.json', 'k',
                      [0.048]),

        # Setup an environment variable
        # p.ParamEnvVar       ('simulation', 'openmp', 'OMP_NUM_THREADS', [4]),

        # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling.
        # As both the applications use the same xml file, you need to do this just once.
        p.ParamADIOS2XML('simulation', 'SimulationOutput', 'engine', [{
            'SST': {}
        }]),

        # Now setup options for the pdf_calc application.
        # Sweep over four values for the nprocs
        p.ParamRunner('pdf_calc', 'nprocs', [32, 64, 128, 256]),
        p.ParamCmdLineArg('pdf_calc', 'infile', 1, ['gs.bp']),
        p.ParamCmdLineArg('pdf_calc', 'outfile', 2, ['pdf']),
    ]

    # Create a Sweep object. This one does not define a node-layout, and thus, all cores of a compute node will be
    #   utilized and mapped to application ranks.
    sweep1 = p.Sweep(parameters=sweep1_parameters)

    # Create another Sweep object and set its node-layout to spawn 16 simulation processes per node, and
    #   4 processes of pdf_calc per node. On Theta, different executables reside on separate nodes as node-sharing
    #   is not permitted on Theta.
    sweep2_parameters = copy.deepcopy(sweep1_parameters)
    sweep2 = p.Sweep(
        node_layout={'theta': [{
            'simulation': 16
        }, {
            'pdf_calc': 4
        }]},
        parameters=sweep2_parameters)

    # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes,
    sweepGroup1 = p.SweepGroup(
        "sg-1",  # A unique name for the SweepGroup
        walltime=3600,  # Total runtime for the SweepGroup
        per_run_timeout=
        600,  # Timeout for each experiment                                
        parameter_groups=[sweep1, sweep2],  # Sweeps to include in this group
        launch_mode='default',  # Launch mode: default, or MPMD if supported
        nodes=128,  # No. of nodes for the batch job.
        # rc_dependency={'pdf_calc':'simulation',}, # Specify dependencies between workflow components
        run_repetitions=
        2,  # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3)
    )

    # Activate the SweepGroup
    sweeps = [sweepGroup1]
Beispiel #20
0
class ProducerConsumer(Campaign):

    # A name for the campaign
    name = "coupling-example"

    # WORKFLOW SETUP
    #---------------
    # A list of the codes that will be part of the workflow
    # If there is an adios xml file associated with the codes, list it here
    # 'sleep_after' represents the time gap after which the next code is spawned
    # Use runner_override to run the code without the default launcher (mpirun/aprun/jsrun etc.). This runs the 
    #   code as a serial application
    codes = [ ("producer",  dict(exe="producer.py",         adios_xml_file='adios2.xml', sleep_after=5)),
              ("mean_calc", dict(exe="mean_calculator.py",  adios_xml_file='adios2.xml', runner_override=False))
            ]

    # CAMPAIGN SETTINGS
    #------------------
    # A list of machines that this campaign is supported on
    supported_machines = ['local', 'titan', 'theta', 'summit', 'rhea', 'deepthought2_cpu', 'sdg_tm76']

    # Option to kill an experiment (just one experiment, not the full sweep or campaign) if one of the codes fails
    kill_on_partial_failure = True

    # Some pre-processing in the experiment directory
    # This is performed when the campaign directory is created (before the campaign is launched)
    run_dir_setup_script = None

    # A post-processing script to be run in the experiment directory after the experiment completes
    # For example, removing some large files after the experiment is done
    run_post_process_script = None

    # umask applied to your directory in the campaign so that colleagues can view files
    umask = '027'

    # Scheduler information: job queue, account-id etc. Leave it to None if running on a local machine
    scheduler_options = {'theta': {'project': '', 'queue': 'batch'},
            'summit': {'project':'csc143','reservation':'csc143_m414'}, 'rhea': {'project':'csc143'}}

    # Setup your environment. Loading modules, setting the LD_LIBRARY_PATH etc.
    # Ensure this script is executable
    app_config_scripts = {'local': 'setup.sh', 'summit': 'env_setup.sh'}

    # PARAMETER SWEEPS
    #-----------------
    # Setup how the workflow is run, and what values to 'sweep' over
    # Use ParamCmdLineArg to setup a command line arg, ParamCmdLineOption to setup a command line option, and so on.
    sweep1_parameters = [
            p.ParamRunner       ('producer', 'nprocs', [2]),
            p.ParamRunner       ('mean_calc', 'nprocs', [2]),
            p.ParamCmdLineArg   ('producer', 'array_size_per_pe', 1, [1024*1024,]), # 1M, 2M, 10M
            p.ParamCmdLineArg   ('producer', 'num_steps', 2, [10]),
            p.ParamADIOS2XML    ('producer', 'engine_sst', 'producer', 'engine', [ {"SST": {}} ]),
            # p.ParamADIOS2XML    ('producer', 'compression', 'producer', 'var_operation', [ {"U": {"zfp":{'accuracy':0.001, 'tolerance':0.9}}} ]),
    ]

    # Summit node layout
    # Create a shared node layout where the producer and mean_calc share compute nodes
    shared_node_nc = SummitNode()

    # place producer on the first socket
    for i in range(21):
        shared_node_nc.cpu[i] = 'producer:{}'.format(i)
    # place analysis on the second socket
    for i in range(8):
        shared_node_nc.cpu[22+i] = 'mean_calc:{}'.format(i)


    # This should be 'obj=machine.VirtualNode()'
    shared_node_dt = DTH2CPUNode()
    for i in range(10):
        shared_node_dt.cpu[i] = 'producer:{}'.format(i)
    shared_node_dt.cpu[11] = 'mean_calc:0'
    shared_node_dt.cpu[12] = 'mean_calc:1'


    # Create a sweep
    # node_layout represents no. of processes per node
    sweep1 = p.Sweep (node_layout = {'summit': [shared_node_nc], 'deepthought2_cpu': [shared_node_dt]},  # simulation: 16 ppn, norm_calc: 4 ppn
                      parameters = sweep1_parameters, rc_dependency=None)

    # Create a sweep group from the above sweep. You can place multiple sweeps in the group.
    # Each group is submitted as a separate job.
    sweepGroup1 = p.SweepGroup ("sg-1",
                                walltime=300,
                                per_run_timeout=60,
                                parameter_groups=[sweep1],
                                launch_mode='default',  # or MPMD
                                # optional:
                                tau_profiling=True,
                                tau_tracing=False,
                                # nodes=10,
                                # tau_profiling=True,
                                # tau_tracing=False,
                                # run_repetitions=2, <-- repeat each experiment this many times
                                # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory
                                # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes
                                # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes'
                                )

    # Create a sweep group from the above sweep. You can place multiple sweeps in the group.
    # Each group is submitted as a separate job.
    sweepGroup2 = p.SweepGroup ("sg-2",
                                walltime=300,
                                per_run_timeout=60,
                                parameter_groups=[sweep1],
                                launch_mode='default',  # or MPMD
                                # optional:
                                tau_profiling=True,
                                tau_tracing=False,
                                # nodes=10,
                                # tau_profiling=True,
                                # tau_tracing=False,
                                # run_repetitions=2, <-- repeat each experiment this many times
                                # component_subdirs = True, <-- codes have their own separate workspace in the experiment directory
                                # component_inputs = {'simulation': ['some_input_file'], 'norm_calc': [SymLink('some_large_file')] } <-- inputs required by codes
                                # max_procs = 64 <-- max no. of procs to run concurrently. depends on 'nodes'
                                )

    # Sweep groups to be activated
    sweeps = {'MACHINE_ANY':[sweepGroup1], 'summit':[sweepGroup2]}
Beispiel #21
0
class PiExperiment(Campaign):
    # Used in job names submitted to scheduler.
    name = "pi-small-one-node"

    # This application has a single executable, which we give the
    # friendly name 'pi' for later reference in parameter specification.
    # The executable path is taken relative to the application directory
    # specified on the cheetah command line.
    codes = [("pi", dict(exe="pi-gmp"))]

    # Document which machines the campaign is designed to run on. An
    # error will be raised if a different machine is specified on the
    # cheetah command line.
    supported_machines = ['local', 'cori', 'titan', 'theta']

    # Per machine scheduler options. Keys are the machine name, values
    # are dicts of name value pairs for the options for that machine.
    # Options must be explicitly supported by Cheetah, this is not
    # currently a generic mechanism.
    scheduler_options = {
        "cori": {
            "queue": "debug",
            "constraint": "haswell",
            "license": "SCRATCH,project",
        },
        "titan": {
            "queue": "debug",
            "project": "csc242",
        },
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        }
    }

    # Optionally set umask for campaign directory and all processes spawned by
    # the workflow script when the campaign is run. Note that user rx
    # must be allowed at a minimum.
    # If set must be a string suitable for passing to the umask command.
    umask = '027'

    run_post_process_script = 'pi-post-run-compare-digits.py'

    # set this and uncomment the exit(1) in the script to test
    # triggering failure.
    #run_post_process_stop_group_on_failure = True

    sweeps = [
     p.SweepGroup(name="all-methods-small", nodes=4,
                  walltime=timedelta(minutes=30),
      parameter_groups=
      [p.Sweep([
        p.ParamCmdLineArg("pi", "method", 1, ["mc", "trap"]),
        p.ParamCmdLineArg("pi", "precision", 2, [64, 128, 256, 512, 1024]),
        p.ParamCmdLineArg("pi", "iterations", 3,
                          [10, 100, 1000, 1000000, 10000000]),
        ]),
       p.Sweep([
        p.ParamCmdLineArg("pi", "method", 1, ["atan"]),
        p.ParamCmdLineArg("pi", "precision", 2,
                          [64, 128, 256, 512, 1024, 2048, 4096]),
        p.ParamCmdLineArg("pi", "iterations", 3,
                          [10, 100, 1000, 10000, 100000]),
        ]),
      ]),
    ]
Beispiel #22
0
class GrayScott(Campaign):
    name = "Gray-Scott"
    codes = [("gray-scott", dict(exe="gray-scott"))]
    app_config_scripts = {'summit': 'env_summit.sh'}
    supported_machines = ['local', 'theta', 'summit']
    scheduler_options = {
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        },
        "summit": {
            "project": "csc299",
        }
    }
    kill_on_partial_failure = True

    umask = '027'

    nprocs = 6

    shared_node = SummitNode()
    for i in range(nprocs):
        shared_node.cpu[i] = "gray-scott:{}".format(i)
        shared_node.gpu[i] = ["gray-scott:{}".format(i)]
    shared_node_layout = [shared_node]

    L = [256]
    noise = [1.e-5]

    Du = [0.1, 0.2, 0.3]
    Dv = [0.05, 0.1, 0.15]
    F = [0.01, 0.02, 0.03]
    k = [0.048, 0.04, 0.06]

    sweep_parameters = \
    [
        p.ParamCmdLineArg("gray-scott", "settings", 1, ["settings.json"]),
        p.ParamConfig("gray-scott", "L", "settings.json", "L",
                      L),
        p.ParamConfig("gray-scott", "noise", "settings.json", "noise",
                      noise),
        p.ParamConfig("gray-scott", "Du", "settings.json", "Du",
                      Du),
        p.ParamConfig("gray-scott", "Dv", "settings.json", "Dv",
                      Dv),
        p.ParamConfig("gray-scott", "F", "settings.json", "F",
                      F),
        p.ParamConfig("gray-scott", "k", "settings.json", "k",
                      k),
        p.ParamRunner('gray-scott', 'nprocs', [nprocs] ),
    ]

    sweep = p.Sweep(parameters=sweep_parameters,
                    node_layout={'summit': shared_node_layout})
    nodes = len(noise) * len(Du) * len(Dv) * len(F) * len(k)

    sweeps = \
    [
        p.SweepGroup(
            name = "gs",
            walltime = timedelta(minutes=60),
            nodes = nodes,
            component_subdirs = True,
            component_inputs = {
                'gray-scott': ['settings.json','adios2.xml'],
            },
            parameter_groups = [sweep]
        )
    ]
Beispiel #23
0
class param_test(Campaign):
    """
    Fake campaign designed to exercise all param types. Instead of real codes,
    bash scripts that echo args and cat files are used.

    NOTE: ParamAdiosXML is exercised by heat_transfer_*.py, not used here,
    all other types should be included.
    """
    # Used in job names submitted to scheduler.
    name = "param_test"

    # Scripts that just print args and exit. print1.sh also cats it's
    # config file.
    codes = [("print1", dict(exe="print1.sh")),
             ("print2", dict(exe="print2.sh")),
             ("print3", dict(exe="print3.sh"))]

    # Files to be copied from app dir to all run component directories,
    # e.g. for use with ParamConfig and ParamAdiosXML.
    inputs = ["all.conf"]

    # Document which machines the campaign is designed to run on. An
    # error will be raised if a different machine is specified on the
    # cheetah command line.
    supported_machines = ['local', 'cori', 'titan', 'theta']

    # Per machine scheduler options. Keys are the machine name, values
    # are dicts of name value pairs for the options for that machine.
    # Options must be explicitly supported by Cheetah, this is not
    # currently a generic mechanism.
    scheduler_options = {
        "cori": {
            "queue": "debug",
            "constraint": "haswell",
            "license": "SCRATCH,project",
        },
        "titan": {
            "queue": "debug",
            "project": "csc242",
        },
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        }
    }

    sweeps = [
        p.SweepGroup(
            name="test",
            nodes=4,
            walltime=timedelta(minutes=5),
            component_subdirs=True,
            component_inputs={
                'print1': ['print1.conf'],
                'print2': ['print2.ini'],
                'print3': ['print3.xml'],
            },
            parameter_groups=[
                p.Sweep([
                    p.ParamCmdLineArg("print1", "arg1", 1, ["val1", "val2"]),
                    p.ParamCmdLineArg("print1", "arg2", 2, [64, 128]),
                    p.ParamCmdLineOption("print1", "opt1", "--opt1",
                                         [0.2, 0.3]),
                    p.ParamCmdLineOption(
                        "print1", "derived", "--derived",
                        lambda d: d["print1"]["arg2"] * d["print2"]["opt1"]),
                    p.ParamKeyValue("print1", "config1", "print1.conf",
                                    "config1", ["c1", "c2"]),
                    p.ParamCmdLineArg("print2", "arg1", 1, ["1lav"]),
                    p.ParamCmdLineArg("print2", "arg2", 2, [2]),
                    p.ParamCmdLineOption("print2", "opt1", "--opt1", [-100]),
                    p.ParamKeyValue("print2", "kvconfig", "print2.ini",
                                    "mykey", ["cv1", "cv2"]),
                    p.ParamCmdLineArg("print3", "arg1", 1, ["val3.1"]),
                    p.ParamConfig("print3", "xmlconfig", "print3.xml",
                                  "WIDGET_VALUE", ["somevalue"]),
                ]),
            ]),
    ]
Beispiel #24
0
class Exaalt(Campaign):
    name = "Exaalt"
    codes = [("stage_write", dict(exe="stage_write/stage_write")),
             ("exaalt", dict(exe="pt_producer_global"))]

    # Note that titan has 16 processes per node
    supported_machines = ['titan']

    scheduler_options = {
        "titan": { "project": "CSC242",
                   "queue": "batch" }
    }

    kill_on_partial_failure = True

    # Example post process script which saves the contents of the output
    # directory after a run, then deletes it to make room for future
    # runs without worrying about the user's disk quota.
    run_post_process_script = "post-run-rm-staged.py"

    inputs = ["states_list.txt"]

    sweeps = [
      # Test group that can be run separately to test that binaries and
      # post process script are working. Only generates a four runs,
      # with same number of nodes but different compression options for
      # stage.
      p.SweepGroup(name="test-32",
                   nodes=34,
                   walltime=timedelta(hours=1, minutes=5),
                   per_run_timeout=timedelta(minutes=15),
                   parameter_groups=[
        p.Sweep([
          # 32 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [512]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [20480]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),

          # 2 stage nodes
          p.ParamRunner("stage_write", "nprocs", [32]),
          p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]),
          p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]),
          p.ParamCmdLineArg("stage_write", "adios_read_method", 3,
                            ["FLEXPATH"]),
          p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]),
          p.ParamCmdLineArg("stage_write", "adios_write_method", 5,
                            ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("stage_write", "write_method_params", 6,
                            ["have_metadata_file=0;num_aggregators=4"]),
          p.ParamCmdLineArg("stage_write", "variables_to_transform", 7,
            ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]),
          p.ParamCmdLineArg("stage_write", "transform_params", 8,
                            ["none","zlib:9", "bzip2:9","lz4"]),
        ]),
      ]),

      # Submit everything as a single job, to avoid queuing delay.
      # Titan allows 12 hour walltime for jobs of 313-3,749 nodes in
      # the batch queue.
      p.SweepGroup(name="64-128-256",
                   nodes=384,
                   walltime=timedelta(hours=48),
                   per_run_timeout=timedelta(hours=1),
                   parameter_groups=[
        p.Sweep([
          # 256 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [4096]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),

          # 16, 8, or 4 stage nodes
          p.ParamRunner("stage_write", "nprocs", [256,128,64]),
          p.ParamCmdLineArg("stage_write", "input_bp_file", 1,
                            ["output.bp"]),
          p.ParamCmdLineArg("stage_write", "output_bp_file", 2,
                            ["staged.bp"]),
          p.ParamCmdLineArg("stage_write", "adios_read_method", 3,
                            ["FLEXPATH"]),
          p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]),
          p.ParamCmdLineArg("stage_write", "adios_write_method", 5,
                            ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("stage_write", "write_method_params", 6,
                            ["have_metadata_file=0;num_aggregators=16"]),
          p.ParamCmdLineArg("stage_write", "variables_to_transform", 7,
              ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]),
          p.ParamCmdLineArg("stage_write", "transform_params", 8,
                            ["none","zlib:9", "bzip2:9","lz4"]),
        ]),
        p.Sweep([
          # 128 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [2048]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),

          # 8, 4, or 2 stage nodes
          p.ParamRunner("stage_write", "nprocs", [128,64,32]),
          p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]),
          p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]),
          p.ParamCmdLineArg("stage_write", "adios_read_method", 3,
                            ["FLEXPATH"]),
          p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]),
          p.ParamCmdLineArg("stage_write", "adios_write_method", 5,
                            ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("stage_write", "write_method_params", 6,
                            ["have_metadata_file=0;num_aggregators=8"]),
          p.ParamCmdLineArg("stage_write", "variables_to_transform", 7,
              ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]),
          p.ParamCmdLineArg("stage_write", "transform_params", 8,
                            ["none","zlib:9", "bzip2:9","lz4"]),
        ]),
        p.Sweep([
          # 64 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [1024]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4, ["FLEXPATH"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5, [""]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),

          # 4, 2, or 1 stage nodes
          p.ParamRunner("stage_write", "nprocs", [64,32,16]),
          p.ParamCmdLineArg("stage_write", "input_bp_file", 1, ["output.bp"]),
          p.ParamCmdLineArg("stage_write", "output_bp_file", 2, ["staged.bp"]),
          p.ParamCmdLineArg("stage_write", "adios_read_method", 3,
                            ["FLEXPATH"]),
          p.ParamCmdLineArg("stage_write", "read_method_params", 4, [""]),
          p.ParamCmdLineArg("stage_write", "adios_write_method", 5,
                            ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("stage_write", "write_method_params", 6,
                            ["have_metadata_file=0;num_aggregators=4"]),
          p.ParamCmdLineArg("stage_write", "variables_to_transform", 7,
            ["atom_id,atom_type,px,py,pz,imx,imy,imz,atom_vid,vx,vy,vz"]),
          p.ParamCmdLineArg("stage_write", "transform_params", 8,
                            ["none","zlib:9", "bzip2:9","lz4"]),
        ]),

        # No staging or compression. Simulation writes data to disk.
        # These are the baseline test cases.
        p.Sweep([
          # 256 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [4096]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4,
                            ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5,
                            ["have_metadata_file=0;num_aggregators=256"]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),
        ]),
        p.Sweep([
          # 128 exaalt nodes
          p.ParamRunner("exaalt", "nprocs", [2048]),
          p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                            ["states_list.txt"]),
          p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
          p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
          p.ParamCmdLineArg("exaalt", "transport_method", 4, ["MPI_AGGREGATE"]),
          p.ParamCmdLineArg("exaalt", "transport_variables", 5,
                            ["have_metadata_file=0;num_aggregators=128"]),
          p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),
        ]),
        p.Sweep([
            # 64 exaalt nodes
            p.ParamRunner("exaalt", "nprocs", [1024]),
            p.ParamCmdLineArg("exaalt", "states_list_file", 1,
                              ["states_list.txt"]),
            p.ParamCmdLineArg("exaalt", "no_of_states", 2, [1433600]),
            p.ParamCmdLineArg("exaalt", "bp_output_file", 3, ["output.bp"]),
            p.ParamCmdLineArg("exaalt", "transport_method", 4,
                              ["MPI_AGGREGATE"]),
            p.ParamCmdLineArg("exaalt", "transport_variables", 5,
                              ["have_metadata_file=0;num_aggregators=64"]),
            p.ParamCmdLineArg("exaalt", "transport_options", 6, ["none"]),
        ]),
      ])
    ]
Beispiel #25
0
class HeatTransfer(Campaign):

    name = "heat-transfer-small-tau-sos"

    codes = [
              ("dataspaces", dict(exe="bin/dataspaces_server",
                                  sleep_after=10,
                                  linked_with_sosflow=False)),
              ("stage_write", dict(exe="stage_write/stage_write_tau",
                                   sleep_after=5,
                                   linked_with_sosflow=True)),
              ("heat", dict(exe="heat_transfer_adios2_tau", sleep_after=5,
                            linked_with_sosflow=True,
                            adios_xml_file='heat_transfer.xml')),
            ]

    supported_machines = ['local','titan','theta', 'cori']
    kill_on_partial_failure=True
    sosd_path = "bin/sosd"

    scheduler_options = {
        "titan": {"project":"CSC249ADCD01",
                  "queue":"batch" },
        "theta": {"project":"CSC249ADCD01",
                  "queue":"debug-flat-quad" },
        "cori": {"project":"m3084",
                  "queue":"regular",
                  "constraint": "haswell" }
    }

    app_config_scripts = {
        'titan': 'titan_config.sh',
        'theta': 'theta_config.sh',
        'cori': 'cori_config.sh',
        'local': None,
    }

    sweeps = [

     p.SweepGroup(
      "sim",
      per_run_timeout=180,
      sosflow_profiling=True,
      component_subdirs=False,

      parameter_groups=
       [p.Sweep(
        node_layout={ "titan": [{ "heat": 16}, {"dataspaces_server":1 }],
                      "theta": [{ "heat": 16}, {"dataspaces_server":1 }],
                      "cori": [{ "heat": 32}, {"dataspaces_server":1 }] },
        parameters=[
        p.ParamRunner("heat", "nprocs", [16]),
        p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]),
        p.ParamCmdLineArg("heat", "xprocs", 2, [4]),
        p.ParamCmdLineArg("heat", "yprocs", 3, [4]),
        p.ParamCmdLineArg("heat", "xsize", 4, [32]),
        p.ParamCmdLineArg("heat", "ysize", 5, [32]),
        p.ParamCmdLineArg("heat", "timesteps", 6, [10]),
        p.ParamCmdLineArg("heat", "checkpoints", 7, [2]),

        p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T",
            ["none", "zlib", "bzip2", "sz", "zfp", "mgard:tol=0.00001",
             "blosc:threshold=4096,shuffle=bit,lvl=1,threads=4,compressor=zstd"]),

        p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat",
            ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0"]),
        p.ParamAdiosXML("heat", "transport_T_final",
                        "adios_transport:heat_final",
            ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]),
        ]),
      ]),


     p.SweepGroup(
      "staging-dataspaces",
      per_run_timeout=500,
      sosflow_profiling=True,

      parameter_groups=
      [p.Sweep(
        node_layout={
            "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}],
            "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}],
            "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]},
        parameters=[

        p.ParamRunner("stage_write", "nprocs", [4]),
        p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]),
        p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]),
        p.ParamCmdLineArg("stage_write", "rmethod", 3, ["DATASPACES"]),
        p.ParamCmdLineArg("stage_write", "ropt", 4, [""]),
        p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]),
        p.ParamCmdLineArg("stage_write", "wopt", 6, [""]),
        p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]),
        p.ParamCmdLineArg("stage_write", "transform", 8,
            ["none", "zfp:accuracy=0.001", "sz:absolute=0.001",
             "zlib:9", "bzip2:9"]),

        p.ParamRunner("heat", "nprocs", [16]),
        p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]),
        p.ParamCmdLineArg("heat", "xprocs", 2, [4]),
        p.ParamCmdLineArg("heat", "yprocs", 3, [4]),
        p.ParamCmdLineArg("heat", "xsize", 4, [32]),
        p.ParamCmdLineArg("heat", "ysize", 5, [32]),
        p.ParamCmdLineArg("heat", "timesteps", 6, [10]),
        p.ParamCmdLineArg("heat", "checkpoints", 7, [2]),

        p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T",
            ["none",]),
        p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat",
            ["DATASPACES"]),
        p.ParamAdiosXML("heat", "transport_T_final",
                        "adios_transport:heat_final",
            ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]),
        ]),
      ]),

     p.SweepGroup(
      "staging-dataspaces-dimes",
      per_run_timeout=500,
      sosflow_profiling=True,

      parameter_groups=
      [p.Sweep(
        node_layout={
            "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}],
            "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}],
            "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]},
        parameters=[
        p.ParamRunner("stage_write", "nprocs", [4]),
        p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]),
        p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]),
        p.ParamCmdLineArg("stage_write", "rmethod", 3, ["DIMES"]),
        p.ParamCmdLineArg("stage_write", "ropt", 4, [""]),
        p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]),
        p.ParamCmdLineArg("stage_write", "wopt", 6, [""]),
        p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]),
        p.ParamCmdLineArg("stage_write", "transform", 8,
              ["none", "zfp:accuracy=0.001", "sz:absolute=0.001",
               "zlib:9", "bzip2:9"]),

        p.ParamRunner("heat", "nprocs", [16]),
        p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]),
        p.ParamCmdLineArg("heat", "xprocs", 2, [4]),
        p.ParamCmdLineArg("heat", "yprocs", 3, [4]),
        p.ParamCmdLineArg("heat", "xsize", 4, [32]),
        p.ParamCmdLineArg("heat", "ysize", 5, [32]),
        p.ParamCmdLineArg("heat", "timesteps", 6, [4]),
        p.ParamCmdLineArg("heat", "checkpoints", 7, [2]),

        p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T",
            ["none",]),
        p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat",
            ["DIMES"]),
        p.ParamAdiosXML("heat", "transport_T_final", "adios_transport:heat_final",
            ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]),
        ]),
      ]),

     p.SweepGroup(
      "staging-flexpath",
      per_run_timeout=500,
      sosflow_profiling=True,

      parameter_groups=
      [p.Sweep(
        node_layout={
            "titan": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 16}],
            "theta": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 64}],
            "cori": [{ "dataspaces": 1 }, {"stage_write": 8}, {"heat": 32}]},
        parameters=[


        p.ParamRunner("stage_write", "nprocs", [4]),
        p.ParamCmdLineArg("stage_write", "input", 1, ["heat_sim_data.bp"]),
        p.ParamCmdLineArg("stage_write", "output", 2, ["staging_output.bp"]),
        p.ParamCmdLineArg("stage_write", "rmethod", 3, ["FLEXPATH"]),
        p.ParamCmdLineArg("stage_write", "ropt", 4, [""]),
        p.ParamCmdLineArg("stage_write", "wmethod", 5, ["POSIX"]),
        p.ParamCmdLineArg("stage_write", "wopt", 6, [""]),
        p.ParamCmdLineArg("stage_write", "variables", 7, ["T"]),
        p.ParamCmdLineArg("stage_write", "transform", 8,
          ["none","zfp:accuracy=0.001","sz:absolute=0.001","zlib:9","bzip2:9"]),

        p.ParamRunner("heat", "nprocs", [16]),
        p.ParamCmdLineArg("heat", "output", 1, ["heat_sim_data"]),
        p.ParamCmdLineArg("heat", "xprocs", 2, [4]),
        p.ParamCmdLineArg("heat", "yprocs", 3, [4]),
        p.ParamCmdLineArg("heat", "xsize", 4, [32]),
        p.ParamCmdLineArg("heat", "ysize", 5, [32]),
        p.ParamCmdLineArg("heat", "timesteps", 6, [4]),
        p.ParamCmdLineArg("heat", "checkpoints", 7, [2]),

        p.ParamAdiosXML("heat", "transform_T", "adios_transform:heat:T",
            ["none",]),
        p.ParamAdiosXML("heat", "transport_T", "adios_transport:heat",
            ["FLEXPATH"]),
        p.ParamAdiosXML("heat", "transport_T_final",
                        "adios_transport:heat_final",
            ["MPI_AGGREGATE:num_aggregators=2;num_ost=2;have_metadata_file=0;"]),
        ]),
      ]),

    ]
Beispiel #26
0
class CalcECampaign(Campaign):
    """Example campaign for calc_e.py, that runs both methods with
    different precision and iteration counts. This could be used to
    explore the convergence rate of each method and the necessary
    decimal precision needed (and the cost of using the Decimal class with
    higher precision)."""

    # Used in job names submitted to scheduler.
    name = "e-small-one-node"

    # This application has a single executable, which we give the
    # friendly name 'pi' for later reference in parameter specification.
    # The executable path is taken relative to the application directory
    # specified on the cheetah command line.
    codes = [("calc_e", dict(exe="program/calc_e.py"))]

    # Document which machines the campaign is designed to run on. An
    # error will be raised if a different machine is specified on the
    # cheetah command line.
    supported_machines = ['local', 'cori', 'titan', 'theta', 'summit']

    # Per machine scheduler options. Keys are the machine name, values
    # are dicts of name value pairs for the options for that machine.
    # Options must be explicitly supported by Cheetah, this is not
    # currently a generic mechanism.
    scheduler_options = {
        "cori": {
            "queue": "debug",
            "constraint": "haswell",
            "license": "SCRATCH,project",
        },
        "titan": {
            "queue": "debug",
            "project": "csc242",
        },
        "theta": {
            "queue": "debug-flat-quad",
            "project": "CSC249ADCD01",
        },
        "summit": {
            'project': 'CSC299'
        }
    }

    # Optionally set umask for campaign directory and all processes spawned by
    # the workflow script when the campaign is run. Note that user rx
    # must be allowed at a minimum.
    # If set must be a string suitable for passing to the umask command.
    umask = '027'

    node = SummitNode()
    node.cpu[0] = f"calc_e:0"
    node_layout = [node]

    # Define the range of command line arguments to pass to the calc_e.py
    # program in each of many runs. Within each Sweep, all possible
    # combinations will be generated and included in the campaign output
    # directory. Because the 'n' parameter has different meaning for the
    # two methods, we must define separate Sweep groups for each method
    # to avoid running 'factorial' with too many iterations.
    sweeps = [
        # Sweep group defines a scheduler job. If different numbers of nodes
        # or node configurations are desired, then multiple SweepGroups can
        # be used. For most simple cases, only one is needed.
        p.SweepGroup(
            name="all-methods-small",
            nodes=1,
            walltime=timedelta(minutes=30),
            parameter_groups=[
                p.Sweep(
                    node_layout={'summit': node_layout},
                    parameters=[
                        p.ParamCmdLineArg("calc_e", "method", 1, ["pow"]),
                        # use higher values of n for this method, since it's doing a single
                        # exponentiation and not iterating like factorial
                        p.ParamCmdLineArg("calc_e", "n", 2,
                                          [10, 100, 1000, 1000000, 10000000]),
                        p.ParamCmdLineArg("calc_e", "precision", 3,
                                          [64, 128, 256, 512, 1024]),
                    ]),
                p.Sweep(
                    node_layout={'summit': node_layout},
                    parameters=[
                        p.ParamCmdLineArg("calc_e", "method", 1,
                                          ["factorial"]),
                        p.ParamCmdLineArg("calc_e", "n", 2, [10, 100, 1000]),
                        # explore higher precision values for this method
                        p.ParamCmdLineArg(
                            "calc_e", "precision", 3,
                            [64, 128, 256, 512, 1024, 2048, 4096]),
                    ]),
            ]),
    ]
class NWChem(Campaign):
    # A name for the campaign
    name = "nwchem"

    # Define your workflow. Setup the applications that form the workflow.
    # exe may be an absolute path.
    # The adios xml file is automatically copied to the campaign directory.
    # 'runner_override' may be used to launch the code on a login/service node as a serial code
    #   without a runner such as aprun/srun/jsrun etc.
    codes = [
        ("nwchem_main",
         dict(
             exe=
             "/ccs/proj/e2e/pnorbert/ADIOS/ADIOS2/build.rhea.gcc/install/bin/adios2_iotest",
             adios_xml_file='copro.xml')),
        ("sorting",
         dict(
             exe=
             "/ccs/proj/e2e/pnorbert/ADIOS/ADIOS2/build.rhea.gcc/install/bin/adios2_iotest",
             adios_xml_file='copro.xml',
             runner_override=False)),
    ]

    # List of machines on which this code can be run
    supported_machines = ['local', 'titan', 'theta', 'rhea']

    # Kill an experiment right away if any workflow components fail (just the experiment, not the whole group)
    kill_on_partial_failure = True

    # Any setup that you may need to do in an experiment directory before the experiment is run
    run_dir_setup_script = None

    # A post-process script that is run for every experiment after the experiment completes
    run_post_process_script = None

    # Directory permissions for the campaign sub-directories
    umask = '027'

    # Options for the underlying scheduler on the target system. Specify the project ID and job queue here.
    # scheduler_options = {'theta': {'project':'CSC249ADCD01', 'queue': 'default'}}
    scheduler_options = {'rhea': {'project': 'csc143'}}

    # A way to setup your environment before the experiment runs. Export environment variables such as LD_LIBRARY_PATH here.
    app_config_scripts = {
        'local': 'setup.sh',
        'theta': 'env_setup.sh',
        'rhea': 'setup_nwchem_rhea.sh'
    }

    # Setup the sweep parameters for a Sweep
    sweep1_parameters = [
        # ParamRunner 'nprocs' specifies the no. of ranks to be spawned
        p.ParamRunner('nwchem_main', 'nprocs', [80]),

        # Create a ParamCmdLineArg parameter to specify a command line argument to run the application
        p.ParamCmdLineOption('nwchem_main', 'app', '-a', [1]),
        p.ParamCmdLineOption('nwchem_main', 'app-config', '-c',
                             ['copro-80.txt']),
        p.ParamCmdLineOption('nwchem_main', 'adios-config', '-x',
                             ['copro.xml']),
        p.ParamCmdLineOption('nwchem_main', 'strongscaling', '-w', [None]),
        p.ParamCmdLineOption('nwchem_main', 'timing', '-t', [None]),
        p.ParamCmdLineOption('nwchem_main', 'decomposition', '-d', [80]),
        # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling.
        p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out',
                         'engine', [{
                             'BP4': {
                                 'OpenTimeoutSecs': '30.0'
                             }
                         }]),
        # Now setup options for the pdf_calc application.
        # Sweep over four values for the nprocs
        p.ParamRunner('sorting', 'nprocs', [8]),
        p.ParamCmdLineOption('sorting', 'app', '-a', [1]),
        p.ParamCmdLineOption('sorting', 'app-config', '-c', ['copro-80.txt']),
        p.ParamCmdLineOption('sorting', 'adios-config', '-x', ['copro.xml']),
        p.ParamCmdLineOption('sorting', 'weakscaling', '-s', [None]),
        p.ParamCmdLineOption('sorting', 'timing', '-t', [None]),
        p.ParamCmdLineOption('sorting', 'decomposition', '-d', [8]),
        # Change the engine for the 'SimulationOutput' IO object in the adios xml file to SST for coupling.
        p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine',
                         [{
                             'BP4': {
                                 'OpenTimeoutSecs': '30.0'
                             }
                         }]),
    ]
    #print(sweep1_parameters)
    sweep2_parameters = sweep1_parameters.copy()
    sweep2_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory',
                                            'trj_dump_out', 'engine', [{
                                                'SST': {}
                                            }])
    sweep2_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory',
                                             'trj_dump_in', 'engine', [{
                                                 'SST': {}
                                             }])
    sweep3_parameters = sweep1_parameters.copy()
    sweep3_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory',
                                            'trj_dump_out', 'engine', [{
                                                "Null": {}
                                            }])
    sweep3_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory',
                                             'trj_dump_in', 'engine', [{
                                                 "Null": {}
                                             }])
    sweep4_parameters = sweep1_parameters.copy()
    sweep4_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory',
                                            'trj_dump_out', 'engine', [{
                                                'BP4': {
                                                    'OpenTimeoutSecs': '30.0',
                                                    'BurstBufferPath': '/tmp'
                                                }
                                            }])
    sweep4_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory',
                                             'trj_dump_in', 'engine', [{
                                                 'BP4': {
                                                     'OpenTimeoutSecs': '30.0',
                                                     'BurstBufferPath': '/tmp'
                                                 }
                                             }])
    #sweep4_parameters = sweep1_parameters.copy()
    #sweep4_parameters[7] = p.ParamADIOS2XML('nwchem_main', 'dump_trajectory', 'trj_dump_out', 'engine', [ {'SSC':{'DataTransport':'WAN'}} ])
    #sweep4_parameters[15] = p.ParamADIOS2XML('sorting', 'load_trajectory', 'trj_dump_in', 'engine', [ {'SSC':{'DataTransport':'WAN'}} ])

    # Create Sweep objects. This one does not define a node-layout, and thus, all cores of a compute node will be
    #   utilized and mapped to application ranks.
    sweep1 = p.Sweep(parameters=sweep1_parameters)
    sweep2 = p.Sweep(parameters=sweep2_parameters)
    sweep3 = p.Sweep(parameters=sweep3_parameters)
    sweep4 = p.Sweep(parameters=sweep4_parameters)

    # Create a SweepGroup and add the above Sweeps. Set batch job properties such as the no. of nodes,
    sweepGroup1 = p.SweepGroup(
        "nwchem-adios",  # A unique name for the SweepGroup
        walltime=18060,  # Total runtime for the SweepGroup
        per_run_timeout=
        500,  # Timeout for each experiment                                
        parameter_groups=[sweep1, sweep2, sweep3,
                          sweep4],  # Sweeps to include in this group
        launch_mode='default',  # Launch mode: default, or MPMD if supported
        nodes=6,  # No. of nodes for the batch job.
        run_repetitions=
        2,  # No. of times each experiment in the group must be repeated (Total no. of runs here will be 3)
        component_inputs={'nwchem_main': ['copro-80.txt']},
    )

    # Activate the SweepGroup
    sweeps = [sweepGroup1]