Ejemplo n.º 1
0
from funcx_endpoint.endpoint.utils.config import Config
from funcx_endpoint.executors import HighThroughputExecutor

from parsl.providers import LocalProvider

...

config = Config(executors=[
    HighThroughputExecutor(
        label="fe.cs.uchicago",
        address=address_by_hostname(),
        provider=SlurmProvider(
            channel=LocalChannel(),
            nodes_per_block=NODES_PER_JOB,
            init_blocks=1,
            partition="general",
            launcher=SrunLauncher(
                overrides=(f"hostname; srun --ntasks={TOTAL_WORKERS} "
                           f"--ntasks-per-node={WORKERS_PER_NODE} "
                           f"--gpus-per-task=rtx2080ti:{GPUS_PER_WORKER} "
                           f"--gpu-bind=map_gpu:{GPU_MAP}")),
            walltime="01:00:00",
        ),
    )
], )
Ejemplo n.º 2
0
def cli_run():

    parser = argparse.ArgumentParser()
    parser.add_argument("-c",
                        "--client_address",
                        required=True,
                        help="Client address")
    parser.add_argument(
        "--client_ports",
        required=True,
        help="client ports as a triple of outgoing,incoming,command",
    )
    parser.add_argument("--worker_port_range",
                        help="Worker port range as a tuple")
    parser.add_argument(
        "-l",
        "--logdir",
        default="./parsl_worker_logs",
        help="Parsl worker log directory",
    )
    parser.add_argument(
        "--worker_ports",
        default=None,
        help="OPTIONAL, pair of workers ports to listen on, "
        "e.g. --worker_ports=50001,50005",
    )
    parser.add_argument(
        "--suppress_failure",
        action="store_true",
        help="Enables suppression of failures",
    )
    parser.add_argument(
        "--endpoint_id",
        required=True,
        help="Endpoint ID, used to identify the endpoint to the remote broker",
    )
    parser.add_argument("--hb_threshold",
                        help="Heartbeat threshold in seconds")
    parser.add_argument(
        "--config",
        default=None,
        help="Configuration object that describes provisioning",
    )
    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        help="Enables debug logging")

    print("Starting HTEX Intechange")
    args = parser.parse_args()

    logdir = os.path.abspath(args.logdir)
    os.makedirs(logdir, exist_ok=True)
    setup_logging(logfile=os.path.join(logdir, "endpoint.log"),
                  debug=args.debug)

    optionals = {}
    optionals["suppress_failure"] = args.suppress_failure
    optionals["logdir"] = os.path.abspath(args.logdir)
    optionals["client_address"] = args.client_address
    optionals["client_ports"] = [int(i) for i in args.client_ports.split(",")]
    optionals["endpoint_id"] = args.endpoint_id

    # DEBUG ONLY : TODO: FIX
    if args.config is None:
        from parsl.providers import LocalProvider

        from funcx_endpoint.endpoint.utils.config import Config

        config = Config(
            worker_debug=True,
            scaling_enabled=True,
            provider=LocalProvider(
                init_blocks=1,
                min_blocks=1,
                max_blocks=1,
            ),
            max_workers_per_node=2,
            funcx_service_address="http://127.0.0.1:8080",
        )
        optionals["config"] = config
    else:
        optionals["config"] = args.config

    if args.worker_ports:
        optionals["worker_ports"] = [
            int(i) for i in args.worker_ports.split(",")
        ]
    if args.worker_port_range:
        optionals["worker_port_range"] = [
            int(i) for i in args.worker_port_range.split(",")
        ]

    ic = EndpointInterchange(**optionals)
    ic.start()
    """
Ejemplo n.º 3
0
from funcx_endpoint.endpoint.utils.config import Config

config = Config()

if __name__ == '__main__':

    import funcx
    import os
    import logging
    funcx.set_stream_logger()
    logger = logging.getLogger(__file__)

    endpoint_dir = "/home/yadu/.funcx/default"

    if config.working_dir is None:
        working_dir = "{}/{}".format(endpoint_dir, "worker_logs")
    # if self.worker_logdir_root is not None:
    #      worker_logdir = "{}/{}".format(self.worker_logdir_root, self.label)

    print("Loading : ", config)
    # Set script dir
    config.provider.script_dir = working_dir
    config.provider.channel.script_dir = os.path.join(working_dir,
                                                      'submit_scripts')
    config.provider.channel.makedirs(config.provider.channel.script_dir,
                                     exist_ok=True)
    os.makedirs(config.provider.script_dir, exist_ok=True)

    debug_opts = "--debug" if config.worker_debug else ""
    max_workers = "" if config.max_workers_per_node == float('inf') \
                  else "--max_workers={}".format(config.max_workers_per_node)
Ejemplo n.º 4
0
config = Config(
    executors=[
        HighThroughputExecutor(
            label="fe.cs.uchicago",
            worker_debug=False,
            address=address_by_hostname(),
            provider=SlurmProvider(
                partition='general',

                # Launch 4 managers per node, each bound to 1 GPU
                # This is a hack. We use hostname ; to terminate the srun command, and
                # start our own
                #
                # DO NOT MODIFY unless you know what you are doing.
                launcher=SrunLauncher(
                    overrides=(f'hostname; srun --ntasks={TOTAL_WORKERS} '
                               f'--ntasks-per-node={WORKERS_PER_NODE} '
                               f'--gpus-per-task=rtx2080ti:{GPUS_PER_WORKER} '
                               f'--gpu-bind=map_gpu:{GPU_MAP}')),

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=NODES_PER_JOB,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 30 minutes
                walltime='00:30:00',
            ),
        )
    ], )
Ejemplo n.º 5
0
from funcx_endpoint.endpoint.utils.config import Config
from parsl.providers import LocalProvider

config = Config(scaling_enabled=True,
                provider=LocalProvider(
                    init_blocks=1,
                    min_blocks=1,
                    max_blocks=1,
                ),
                max_workers_per_node=2,
                funcx_service_address='https://api.funcx.org/v1')

# For now, visible_to must be a list of URNs for globus auth users or groups, e.g.:
# urn:globus:auth:identity:{user_uuid}
# urn:globus:groups:id:{group_uuid}
meta = {
    "name": "$name",
    "description": "",
    "organization": "",
    "department": "",
    "public": False,
    "visible_to": []
}
Ejemplo n.º 6
0
config = Config(
    executors=[
        HighThroughputExecutor(
            max_workers_per_node=2,
            worker_debug=False,
            address=address_by_hostname(),
            provider=SlurmProvider(
                partition=user_opts['frontera']['partition'],
                launcher=SrunLauncher(),

                # Enter scheduler_options if needed
                scheduler_options=user_opts['frontera']['scheduler_options'],

                # Command to be run before starting a worker, such as:
                # 'module load Anaconda; source activate parsl_env'.
                worker_init=user_opts['frontera']['worker_init'],

                # Add extra time for slow scheduler responses
                cmd_timeout=60,

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=2,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 30 minutes
                walltime='00:30:00',
            ),
        )
    ], )
Ejemplo n.º 7
0
}

config = Config(
    executors=[
        HighThroughputExecutor(
            max_workers_per_node=1,
            worker_debug=False,
            address=address_by_hostname(),
            provider=TorqueProvider(
                queue='normal',
                launcher=AprunLauncher(overrides="-b -- bwpy-environ --"),

                # string to prepend to #SBATCH blocks in the submit
                scheduler_options=user_opts['bluewaters']['scheduler_options'],

                # Command to be run before starting a worker, such as:
                # 'module load bwpy; source activate funcx env'.
                worker_init=user_opts['bluewaters']['worker_init'],

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=2,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 30 minutes
                walltime='00:30:00'),
        )
    ], )

# fmt: on
Ejemplo n.º 8
0
config = Config(
    executors=[
        HighThroughputExecutor(
            max_workers_per_node=1,
            worker_debug=False,
            address=address_by_hostname(),
            scheduler_mode='soft',
            worker_mode='singularity_reuse',
            container_type='singularity',
            container_cmd_options="-H /home/$USER",
            provider=CobaltProvider(
                queue='debug-flat-quad',
                account=user_opts['theta']['account'],
                launcher=AprunLauncher(overrides="-d 64"),

                # string to prepend to #COBALT blocks in the submit
                # script to the scheduler eg: '#COBALT -t 50'
                scheduler_options=user_opts['theta']['scheduler_options'],

                # Command to be run before starting a worker, such as:
                # 'module load Anaconda; source activate funcx_env'.
                worker_init=user_opts['theta']['worker_init'],

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=2,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 30 minutes
                walltime='00:30:00'
            ),
        )
    ],
)
Ejemplo n.º 9
0
import argparse
from funcx_endpoint.endpoint.utils.config import Config
from funcx_endpoint.executors.high_throughput.interchange import Interchange

import funcx

funcx.set_stream_logger()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--address", required=True, help="Address")
    parser.add_argument("-c", "--client_ports", required=True, help="ports")
    args = parser.parse_args()
    config = Config()

    ic = Interchange(
        config,
        client_address=args.address,
        client_ports=[int(i) for i in args.client_ports.split(',')],
    )
    ic.start()
    print("Interchange started")
Ejemplo n.º 10
0
}

config = Config(
    executors=[
        HighThroughputExecutor(
            max_workers_per_node=2,
            worker_debug=False,
            address=address_by_hostname(),
            provider=SlurmProvider(
                partition='broadwl',
                launcher=SrunLauncher(),

                # string to prepend to #SBATCH blocks in the submit
                # script to the scheduler eg: '#SBATCH --constraint=knl,quad,cache'
                scheduler_options=user_opts['midway']['scheduler_options'],

                # Command to be run before starting a worker, such as:
                # 'module load Anaconda; source activate parsl_env'.
                worker_init=user_opts['midway']['worker_init'],

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=2,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 30 minutes
                walltime='00:30:00'),
        )
    ], )

# fmt: on
Ejemplo n.º 11
0
}

config = Config(
    executors=[
        HighThroughputExecutor(
            label='Kubernetes_funcX',
            max_workers_per_node=1,
            address=address_by_route(),
            scheduler_mode='hard',
            container_type='docker',
            strategy=KubeSimpleStrategy(max_idletime=3600),
            provider=KubernetesProvider(
                init_blocks=0,
                min_blocks=0,
                max_blocks=2,
                init_cpu=1,
                max_cpu=4,
                init_mem="1024Mi",
                max_mem="4096Mi",
                image=user_opts['kube']['image'],
                worker_init=user_opts['kube']['worker_init'],
                namespace=user_opts['kube']['namespace'],
                incluster_config=False,
            ),
        )
    ],
    heartbeat_period=15,
    heartbeat_threshold=200,
    log_dir='.',
)

# fmt: on
Ejemplo n.º 12
0
from parsl.providers import LocalProvider

from funcx_endpoint.endpoint.utils.config import Config
from funcx_endpoint.executors import HighThroughputExecutor

config = Config(
    executors=[
        HighThroughputExecutor(provider=LocalProvider(
            init_blocks=1,
            min_blocks=0,
            max_blocks=1,
        ), )
    ],
    funcx_service_address="https://api2.funcx.org/v2",
)

# For now, visible_to must be a list of URNs for globus auth users or groups, e.g.:
# urn:globus:auth:identity:{user_uuid}
# urn:globus:groups:id:{group_uuid}
meta = {
    "name": "$name",
    "description": "",
    "organization": "",
    "department": "",
    "public": False,
    "visible_to": [],
}
Ejemplo n.º 13
0
        # Set ncpus=32, otherwise it defaults to 1 on Polaris
        'scheduler_options': '',
    }
}

config = Config(
    executors=[
        HighThroughputExecutor(
            max_workers_per_node=1,
            strategy=SimpleStrategy(max_idletime=300),
            # IP of Polaris testbed login node
            address='10.230.2.72',
            provider=PBSProProvider(
                launcher=SingleNodeLauncher(),
                queue='workq',
                scheduler_options=user_opts['polaris']['scheduler_options'],
                # Command to be run before starting a worker, such as:
                # 'module load Anaconda; source activate parsl_env'.
                worker_init=user_opts['polaris']['worker_init'],
                cpus_per_node=32,
                walltime='01:00:00',
                nodes_per_block=1,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,
            ),
        )
    ], )

# fmt: on
Ejemplo n.º 14
0
config = Config(
    executors=[
        HighThroughputExecutor(
            worker_debug=False,
            address=address_by_interface('bond0.144'),
            provider=SlurmProvider(
                partition='GPU',  # Partition / QOS

                # We request all hyperthreads on a node.
                launcher=SrunLauncher(overrides='-c 272'),

                # string to prepend to #SBATCH blocks in the submit
                # script to the scheduler eg: '#SBATCH --constraint=gpu'
                scheduler_options=user_opts['perlmutter']['scheduler_options'],

                # Command to be run before starting a worker, such as:
                # 'module load Anaconda; source activate parsl_env'.
                worker_init=user_opts['perlmutter']['worker_init'],

                # Slurm scheduler on Cori can be slow at times,
                # increase the command timeouts
                cmd_timeout=120,

                # Scale between 0-1 blocks with 2 nodes per block
                nodes_per_block=2,
                init_blocks=0,
                min_blocks=0,
                max_blocks=1,

                # Hold blocks for 10 minutes
                walltime='00:10:00',
            ),
        ),
    ],
)