Example #1
0
    def __init__(self,
                 channel=LocalChannel(),
                 account=None,
                 queue=None,
                 scheduler_options='',
                 select_options='',
                 worker_init='',
                 nodes_per_block=1,
                 cpus_per_node=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=1,
                 parallelism=1,
                 launcher=SingleNodeLauncher(),
                 walltime="00:20:00",
                 cmd_timeout=120):
        super().__init__(channel,
                         account,
                         queue,
                         scheduler_options,
                         worker_init,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         launcher,
                         walltime,
                         cmd_timeout=cmd_timeout)

        self.template_string = template_string
        self._label = 'pbspro'
        self.cpus_per_node = cpus_per_node
        self.select_options = select_options
Example #2
0
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 launcher=SingleNodeLauncher(),
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=1,
                 worker_init='',
                 cmd_timeout=30,
                 parallelism=1,
                 move_files=None):
        self.channel = channel
        self._label = 'local'
        self.nodes_per_block = nodes_per_block
        self.launcher = launcher
        self.worker_init = worker_init
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.parallelism = parallelism
        self.script_dir = None
        self.cmd_timeout = cmd_timeout
        self.move_files = move_files

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources = {}
Example #3
0
    def __init__(self,
                 project_id,
                 key_file,
                 region,
                 os_project,
                 os_family,
                 google_version='v1',
                 instance_type='n1-standard-1',
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 launcher=SingleNodeLauncher(),
                 parallelism=1):
        self.project_id = project_id
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_file
        self.zone = self.get_zone(region)
        self.os_project = os_project
        self.os_family = os_family
        self.label = 'google_cloud'
        self.client = googleapiclient.discovery.build('compute',
                                                      google_version)
        self.instance_type = instance_type
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.parallelism = parallelism
        self.num_instances = 0
        self.launcher = launcher

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources = {}
        self.provisioned_blocks = 0
        atexit.register(self.bye)
Example #4
0
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 walltime="00:10:00",
                 scheduler_options='',
                 worker_init='',
                 project=None,
                 cmd_timeout=120,
                 move_files=True,
                 launcher=SingleNodeLauncher()):
        label = 'LSF'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         cmd_timeout=cmd_timeout,
                         launcher=launcher)

        self.project = project
        self.move_files = move_files
        self.scheduler_options = scheduler_options
        self.worker_init = worker_init
Example #5
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label="htex_Local",
                working_dir=working_dir,
                storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
                worker_debug=True,
                cores_per_worker=1,
                heartbeat_period=2,
                heartbeat_threshold=5,
                poll_period=100,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=0,
                    min_blocks=0,
                    max_blocks=5,
                    launcher=SingleNodeLauncher(),
                ),
            )
        ],
        strategy='simple',
        app_cache=True, checkpoint_mode='task_exit',
        retries=2,
        monitoring=MonitoringHub(
                        hub_address="localhost",
                        hub_port=55055,
                        monitoring_debug=False,
                        resource_monitoring_interval=1,
        )
    )
Example #6
0
    def __init__(self,
                 partition,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 walltime="00:10:00",
                 scheduler_options='',
                 worker_init='',
                 cmd_timeout=10,
                 exclusive=True,
                 launcher=SingleNodeLauncher()):
        label = 'slurm'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         cmd_timeout=cmd_timeout,
                         launcher=launcher)

        self.partition = partition
        self.exclusive = exclusive
        if exclusive:
            self.scheduler_options = "#SBATCH --exclusive\n" + scheduler_options
        else:
            self.scheduler_options = scheduler_options
        self.worker_init = worker_init
Example #7
0
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=1,
                 parallelism=1,
                 walltime="00:10:00",
                 scheduler_options='',
                 worker_init='',
                 launcher=SingleNodeLauncher(),
                 cmd_timeout: int = 60,
                 queue=None):
        label = 'grid_engine'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         launcher,
                         cmd_timeout=cmd_timeout)
        self.scheduler_options = scheduler_options
        self.worker_init = worker_init

        if launcher in ['srun', 'srun_mpi']:
            logger.warning(
                "Use of {} launcher is usually appropriate for Slurm providers. "
                "Recommended options include 'single_node' or 'aprun'.".format(
                    launcher))
Example #8
0
def test_local_channel():
    with tempfile.TemporaryDirectory() as script_dir:
        script_dir = tempfile.mkdtemp()
        p = LocalProvider(channel=LocalChannel(),
                          launcher=SingleNodeLauncher(debug=False))
        p.script_dir = script_dir
        _run_tests(p)
Example #9
0
    def __init__(self,
                 channel: Channel = LocalChannel(),
                 nodes_per_block: int = 1,
                 cores_per_slot: Optional[int] = None,
                 mem_per_slot: Optional[float] = None,
                 init_blocks: int = 1,
                 min_blocks: int = 0,
                 max_blocks: int = 1,
                 parallelism: float = 1,
                 environment: Optional[Dict[str, str]] = None,
                 project: str = '',
                 scheduler_options: str = '',
                 transfer_input_files: List[str] = [],
                 walltime: str = "00:10:00",
                 worker_init: str = '',
                 launcher: Launcher = SingleNodeLauncher(),
                 requirements: str = '',
                 cmd_timeout: int = 60,
                 cmd_chunk_size: int = 100) -> None:

        label = 'condor'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         launcher,
                         cmd_timeout=cmd_timeout)
        self.cores_per_slot = cores_per_slot
        self.mem_per_slot = mem_per_slot
        self.cmd_chunk_size = cmd_chunk_size

        # To Parsl, Condor slots should be treated equivalently to nodes
        self.cores_per_node = cores_per_slot
        self.mem_per_node = mem_per_slot

        self.environment = environment if environment is not None else {}
        for key, value in self.environment.items():
            # To escape literal quote marks, double them
            # See: http://research.cs.wisc.edu/htcondor/manual/v8.6/condor_submit.html
            try:
                self.environment[key] = "'{}'".format(
                    value.replace("'", '"').replace('"', '""'))
            except AttributeError:
                pass

        self.project = project
        self.scheduler_options = scheduler_options + '\n'
        self.worker_init = worker_init + '\n'
        self.requirements = requirements
        self.transfer_input_files = transfer_input_files
Example #10
0
    def __init__(self,
                 partition: Optional[str],
                 account: Optional[str] = None,
                 channel: Channel = LocalChannel(),
                 nodes_per_block: int = 1,
                 cores_per_node: Optional[int] = None,
                 mem_per_node: Optional[int] = None,
                 init_blocks: int = 1,
                 min_blocks: int = 0,
                 max_blocks: int = 1,
                 parallelism: float = 1,
                 walltime: str = "00:10:00",
                 scheduler_options: str = '',
                 worker_init: str = '',
                 cmd_timeout: int = 10,
                 exclusive: bool = True,
                 move_files: bool = True,
                 launcher: Launcher = SingleNodeLauncher()):
        label = 'slurm'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         cmd_timeout=cmd_timeout,
                         launcher=launcher)

        self.partition = partition
        self.cores_per_node = cores_per_node
        self.mem_per_node = mem_per_node
        self.exclusive = exclusive
        self.move_files = move_files
        self.account = account
        self.scheduler_options = scheduler_options + '\n'
        if exclusive:
            self.scheduler_options += "#SBATCH --exclusive\n"
        if partition:
            self.scheduler_options += "#SBATCH --partition={}\n".format(
                partition)
        if account:
            self.scheduler_options += "#SBATCH --account={}\n".format(account)
        self.worker_init = worker_init + '\n'
Example #11
0
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 environment=None,
                 project='',
                 scheduler_options='',
                 transfer_input_files=[],
                 walltime="00:10:00",
                 worker_init='',
                 launcher=SingleNodeLauncher(),
                 requirements='',
                 cmd_timeout=60):

        label = 'condor'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         launcher,
                         cmd_timeout=cmd_timeout)
        self.provisioned_blocks = 0

        self.environment = environment if environment is not None else {}
        for key, value in self.environment.items():
            # To escape literal quote marks, double them
            # See: http://research.cs.wisc.edu/htcondor/manual/v8.6/condor_submit.html
            try:
                self.environment[key] = "'{}'".format(
                    value.replace("'", '"').replace('"', '""'))
            except AttributeError:
                pass

        self.project = project
        self.scheduler_options = scheduler_options
        self.worker_init = worker_init
        self.requirements = requirements
        self.transfer_input_files = transfer_input_files
Example #12
0
def test_simple(mem_per_worker):

    config = Config(
        executors=[
            HighThroughputExecutor(
                poll_period=1,
                label="htex_local",
                worker_debug=True,
                mem_per_worker=mem_per_worker,
                cores_per_worker=0.1,
                suppress_failure=True,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SingleNodeLauncher(),
                ),
            )
        ],
        strategy=None,
    )
    parsl.load(config)

    print("Configuration requests:")
    print("cores_per_worker: ", config.executors[0].cores_per_worker)
    print("mem_per_worker: ", config.executors[0].mem_per_worker)

    available_mem_on_node = round(psutil.virtual_memory().available / (2**30), 1)
    expected_workers = multiprocessing.cpu_count() / config.executors[0].cores_per_worker
    if mem_per_worker:
        expected_workers = int(available_mem_on_node / config.executors[0].mem_per_worker)

    print("Available memory: ", available_mem_on_node)
    print("Expected workers: ", expected_workers)
    # Prime a worker
    double(5).result()
    dfk = parsl.dfk()
    connected = dfk.executors['htex_local'].connected_workers
    print("Connected : ", connected)
    assert expected_workers == connected, "Expected {} workers, instead got {} workers".format(expected_workers,
                                                                                               connected)
    parsl.clear()
    return True
Example #13
0
def test_ssh_channel():
    with tempfile.TemporaryDirectory() as config_dir:
        sshd_thread, priv_key, server_port = _start_sshd(config_dir)
        try:
            with tempfile.TemporaryDirectory() as remote_script_dir:
                # The SSH library fails to add the new host key to the file if the file does not
                # already exist, so create it here.
                pathlib.Path(
                    '{}/known.hosts'.format(config_dir)).touch(mode=0o600)
                script_dir = tempfile.mkdtemp()
                p = LocalProvider(channel=SSHChannel(
                    '127.0.0.1',
                    port=server_port,
                    script_dir=remote_script_dir,
                    host_keys_filename='{}/known.hosts'.format(config_dir),
                    key_filename=priv_key),
                                  launcher=SingleNodeLauncher(debug=False))
                p.script_dir = script_dir
                _run_tests(p)
        finally:
            _stop_sshd(sshd_thread)
Example #14
0
    def __init__(self,
                 partition,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 cores_per_node=None,
                 mem_per_node=None,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 walltime="00:10:00",
                 scheduler_options='',
                 worker_init='',
                 cmd_timeout=10,
                 exclusive=True,
                 move_files=True,
                 launcher=SingleNodeLauncher()):
        label = 'slurm'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         cmd_timeout=cmd_timeout,
                         launcher=launcher)

        self.partition = partition
        self.cores_per_node = cores_per_node
        self.mem_per_node = mem_per_node
        self.exclusive = exclusive
        self.move_files = move_files
        self.scheduler_options = scheduler_options + '\n'
        if exclusive:
            self.scheduler_options += "#SBATCH --exclusive\n"
        self.worker_init = worker_init + '\n'
Example #15
0
# This is an example config, make sure to
#        replace the specific values below with the literal values
#          (e.g., 'USERNAME' -> 'your_username')

config = Config(
    executors=[
        IPyParallelExecutor(
            label='cooley_ssh_il_local_single_node',
            provider=CobaltProvider(
                channel=SSHInteractiveLoginChannel(
                    hostname='cooley.alcf.anl.gov',
                    username='******',     # Please replace USERNAME with your username
                    script_dir='/home/USERNAME/parsl_scripts/',    # Please replace USERNAME with your username
                ),
                nodes_per_block=1,
                tasks_per_node=1,
                init_blocks=1,
                max_blocks=1,
                walltime="00:05:00",
                scheduler_options='',     # Input your scheduler_options if needed
                worker_init='',     # Input your worker_init if needed
                queue='pubnet-debug',
                account='ALCF_ALLOCATION',    # Please replace ALCF_ALLOCATION with your ALCF allocation
                launcher=SingleNodeLauncher(),
            ),
            controller=Controller(public_ip='PUBLIC_IP'),    # Please replace PUBLIC_IP with your public ip
        )

    ],
)
Example #16
0
        tasks_per_node = target_workers % args.cores_per_node
    else:
        nodes_per_block = int(target_workers / args.cores_per_node)
        tasks_per_node = args.cores_per_node

    config = Config(
        executors=[
            HighThroughputExecutor(
                label="funcx_local",
                #            worker_debug=True,
                worker_mode="singularity_reuse",
                container_image=os.path.expanduser("~/sing-run.simg"),
                cores_per_worker=int(args.cores_per_node / tasks_per_node),
                max_workers=1,
                address=address_by_interface("eth0"),
                provider=CobaltProvider(launcher=SingleNodeLauncher(),
                                        init_blocks=1,
                                        max_blocks=1,
                                        queue=args.queue,
                                        account='DLHub',
                                        worker_init="source activate funcx_5"),
            )
        ],
        run_dir="/home/tskluzac/FuncX/evaluation/runinfo",
        strategy=None,
    )

    parsl.clear()
    dfk = parsl.load(config)
    executor = list(dfk.executors.values())[0]
Example #17
0
    def __init__(self,
                 vm_reference,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 parallelism=1,
                 worker_init='',
                 location='westus',
                 group_name='parsl.auto',
                 key_name=None,
                 key_file=None,
                 vnet_name="parsl.auto",
                 linger=False,
                 launcher=SingleNodeLauncher()):
        if not _api_enabled:
            raise OptionalModuleMissing(
                ['azure', 'msrestazure'],
                "Azure Provider requires the azure module.")

        self._label = 'azure'
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.max_nodes = max_blocks
        self.parallelism = parallelism
        self.nodes_per_block = 1

        self.worker_init = worker_init
        self.vm_reference = vm_reference
        self.region = location
        self.vnet_name = vnet_name

        self.key_name = key_name
        self.key_file = key_file
        self.location = location
        self.group_name = group_name

        self.launcher = launcher
        self.linger = linger
        self.resources = {}
        self.instances = []

        env_specified = os.getenv("AZURE_CLIENT_ID") is not None and os.getenv(
            "AZURE_CLIENT_SECRET") is not None and os.getenv(
                "AZURE_TENANT_ID") is not None and os.getenv(
                    "AZURE_SUBSCRIPTION_ID") is not None

        if key_file is None and not env_specified:
            raise ConfigurationError("Must specify either, 'key_file', or\
                 `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`,\
                  and `AZURE_TENANT_ID` environment variables.")

        if key_file is None:
            self.clientid = os.getenv("AZURE_CLIENT_ID")
            self.clientsecret = os.getenv("AZURE_CLIENT_SECRET")
            self.tenantid = os.getenv("AZURE_TENANT_ID")
            self.subid = os.getenv("AZURE_SUBSCRIPTION_ID")
        else:
            with open(key_file) as fh:
                keys = json.load(fh)
                self.clientid = keys.get("AZURE_CLIENT_ID")
                self.clientsecret = keys.get("AZURE_CLIENT_SECRET")
                self.tenantid = keys.get("AZURE_TENANT_ID")
                self.subid = keys.get("AZURE_SUBSCRIPTION_ID")

        self.get_clients()
Example #18
0
    def __init__(self,
                 image_id,
                 key_name,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=10,
                 nodes_per_block=1,
                 parallelism=1,
                 worker_init='',
                 instance_type='t2.small',
                 region='us-east-2',
                 spot_max_bid=0,
                 key_file=None,
                 profile=None,
                 iam_instance_profile_arn='',
                 state_file=None,
                 walltime="01:00:00",
                 linger=False,
                 launcher=SingleNodeLauncher()):
        if not _boto_enabled:
            raise OptionalModuleMissing(
                ['boto3'], "AWS Provider requires the boto3 module.")

        self.image_id = image_id
        self._label = 'ec2'
        self.init_blocks = init_blocks
        self.min_blocks = min_blocks
        self.max_blocks = max_blocks
        self.nodes_per_block = nodes_per_block
        self.max_nodes = max_blocks * nodes_per_block
        self.parallelism = parallelism

        self.worker_init = worker_init
        self.instance_type = instance_type
        self.region = region
        self.spot_max_bid = spot_max_bid

        self.key_name = key_name
        self.key_file = key_file
        self.profile = profile
        self.iam_instance_profile_arn = iam_instance_profile_arn

        self.walltime = walltime
        self.launcher = launcher
        self.linger = linger
        self.resources = {}
        self.state_file = state_file if state_file is not None else '.ec2_{}.json'.format(
            self.label)

        env_specified = os.getenv(
            "AWS_ACCESS_KEY_ID") is not None and os.getenv(
                "AWS_SECRET_ACCESS_KEY") is not None
        if profile is None and key_file is None and not env_specified:
            raise ConfigurationError(
                "Must specify either profile', 'key_file', or "
                "'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' environment variables."
            )

        try:
            self.initialize_boto_client()
        except Exception as e:
            logger.error("{} failed to initialize.".format(self))
            raise e

        state_file_exists = False
        try:
            self.read_state_file(self.state_file)
            state_file_exists = True
        except Exception:
            logger.info(
                "No state file found. Cannot load previous options. Creating new infrastructure."
            )

        if not state_file_exists:
            try:
                self.create_vpc().id
            except Exception as e:
                logger.info(
                    "Failed to create ec2 infrastructure: {0}".format(e))
                raise
            else:
                self.write_state_file()
Example #19
0
File: lsf.py Project: Parsl/parsl
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 cores_per_block=None,
                 cores_per_node=None,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=1,
                 parallelism=1,
                 walltime="00:10:00",
                 scheduler_options='',
                 worker_init='',
                 project=None,
                 queue=None,
                 cmd_timeout=120,
                 move_files=True,
                 bsub_redirection=False,
                 request_by_nodes=True,
                 launcher=SingleNodeLauncher()):
        label = 'LSF'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         cmd_timeout=cmd_timeout,
                         launcher=launcher)

        self.project = project
        self.queue = queue
        self.cores_per_block = cores_per_block
        self.cores_per_node = cores_per_node
        self.move_files = move_files
        self.bsub_redirection = bsub_redirection
        self.request_by_nodes = request_by_nodes

        # Update scheduler options
        self.scheduler_options = scheduler_options + "\n"
        if project:
            self.scheduler_options += "#BSUB -P {}\n".format(project)
        if queue:
            self.scheduler_options += "#BSUB -q {}\n".format(queue)
        if request_by_nodes:
            self.scheduler_options += "#BSUB -nnodes {}\n".format(
                nodes_per_block)
        else:
            assert cores_per_block is not None and cores_per_node is not None, \
                       "Requesting resources by the number of cores. " \
                       "Need to specify cores_per_block and cores_per_node in the LSF provider."

            self.scheduler_options += "#BSUB -n {}\n".format(cores_per_block)
            self.scheduler_options += '#BSUB -R "span[ptile={}]"\n'.format(
                cores_per_node)

            # Set nodes_per_block manually for Parsl strategy
            assert cores_per_node != 0, "Need to specify a non-zero cores_per_node."
            self.nodes_per_block = int(
                math.ceil(cores_per_block / cores_per_node))

        self.worker_init = worker_init