Esempio n. 1
0
    def configure_for_parsl(self):
        """Utility function to set parsl configuration parameters"""
        from parsl.executors import ThreadPoolExecutor

        max_threads = int(os.environ.get("SLURM_JOB_NUM_NODES", 1))
        executor = ThreadPoolExecutor(label="local", max_threads=max_threads)
        self.info["executor"] = executor
Esempio n. 2
0
    def configure_for_parsl(self):
        """Utility function to set parsl configuration parameters"""
        from parsl.executors import ThreadPoolExecutor

        max_threads = self.config.get("max_threads", 4)
        executor = ThreadPoolExecutor(label="local", max_threads=max_threads)
        # executors = [executor]

        self.info["executor"] = executor
Esempio n. 3
0
def local_setup():
    threads_config = Config(
        executors=[ThreadPoolExecutor(label='threads', max_threads=4)],
        monitoring=MonitoringHub(hub_address="127.0.0.1",
                                 hub_port=55055,
                                 logging_level=logging.INFO,
                                 resource_monitoring_interval=10))

    parsl.load(threads_config)
Esempio n. 4
0
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            ThreadPoolExecutor(label='qc', max_threads=nwc_workers),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=
                    nodes_per_nwchem,  # Minimum increment in blcoks
                    init_blocks=0,
                    max_blocks=total_nodes //
                    nodes_per_nwchem,  # Limits the number of manager processes,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Esempio n. 5
0
def theta_nwchem_config(
    choice: str,
    log_dir: str,
    nodes_per_nwchem: int = 2,
    total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))
) -> Config:
    """Theta configuration to run NWChem

    Args:
        choice: Choice of the runtime configuration
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    if choice == "htex":
        qc_exec = HighThroughputExecutor(
            address=address_by_hostname(),
            label="qc",
            max_workers=nwc_workers,
            cores_per_worker=1e-6,
            provider=LocalProvider(
                nodes_per_block=1,
                init_blocks=0,
                max_blocks=1,
                launcher=SimpleLauncher(),  # Places worker on the launch node
                worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
            ),
        )
    elif choice == 'thread':
        qc_exec = ThreadPoolExecutor(label='qc', max_threads=nwc_workers)
    else:
        raise ValueError(f'Choice "{choice}" not recognized ')

    return Config(executors=[qc_exec],
                  run_dir=log_dir,
                  strategy='simple',
                  max_idletime=15.)
Esempio n. 6
0
    args = parser.parse_args()

    # Write the configuration
    config = Config(
        executors=[
            HighThroughputExecutor(
                label="htex",
                # Max workers limits the concurrency exposed via mom node
                max_workers=2,
                worker_port_range=(10000, 20000),
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1,
                ),
            ),
            ThreadPoolExecutor(label="local_threads")
        ],
        strategy=None,
    )
    parsl.load(config)

    # Connect to the redis server
    input_queue = RedisQueue(args.redishost,
                             port=int(args.redisport),
                             prefix='input')
    input_queue.connect()

    output_queue = RedisQueue(args.redishost,
                              port=int(args.redisport),
                              prefix='output')
    output_queue.connect()
Esempio n. 7
0
    provider=SlurmProvider(
        cori_queue,
        nodes_per_block=compute_nodes,
        exclusive=True,
        init_blocks=0,
        min_blocks=0,
        max_blocks=1,
        scheduler_options="""#SBATCH --constraint=haswell""",
        launcher=SrunLauncher(),
        cmd_timeout=60,
        walltime=walltime,
        worker_init=worker_init,
        parallelism=1.0),
)

local_executor = ThreadPoolExecutor(max_threads=2, label="submit-node")


def wrap_no_op(s):
    return s


cori_shifter_debug_config = WorkflowConfig(
    trim_ingest_list=600,
    ingest_source=
    "/global/projecta/projectdirs/lsst/production/DC2_ImSim/Run2.1.1i/sim/agn-test",
    rerun_prefix="benc4",
    visit_min=0,
    visit_max=9999999999,
    tract_subset=None,
    patch_subset=None,
Esempio n. 8
0
    # Write the configuration
    config = Config(
        executors=[
            HighThroughputExecutor(
                address="localhost",
                label="htex",
                # Max workers limits the concurrency exposed via mom node
                max_workers=2,
                worker_port_range=(10000, 20000),
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1,
                ),
            ),
            ThreadPoolExecutor(label="local_threads", max_threads=4)
        ],
        strategy=None,
    )
    parsl.load(config)
    parsl.set_stream_logger(level=logging.INFO)

    # Connect to the redis server
    client_queues, server_queues = make_queue_pairs(args.redishost,
                                                    args.redisport,
                                                    clean_slate=True,
                                                    use_pickle=True)

    # Create the method server and task generator
    doer = ParslMethodServer([target_fun, generate, score, select],
                             server_queues,
Esempio n. 9
0
def parslConfigFromCompute(compute):
    """Given a Compute instance, return a setup parsl configuration"""
    if isinstance(compute, EC2Compute):
        # NOTE: Assumes the paropt is being run on an EC2 instance with access to metadata service
        try:
            public_ip = getAWSPublicIP()

            # get the required environment variables
            required_env_vars = [
                "PAROPT_AWS_REGION", "PAROPT_AWS_KEY_NAME",
                "PAROPT_AWS_STATE_FILE", "PAROPT_AWS_IAM_INSTANCE_PROFILE_ARN"
            ]
            env_vars = {
                varname.replace('PAROPT_AWS_', '').lower(): os.getenv(varname)
                for varname in required_env_vars
            }
            missing_vars = [
                varname for varname, value in env_vars.items() if value == None
            ]
            if missing_vars:
                raise Exception(
                    "Missing required environment variables for running parsl with AWS:\n{}"
                    .format(missing_vars))

            parsl_config = Config(
                executors=[
                    HighThroughputExecutor(
                        label='htex_local',
                        address=public_ip,
                        worker_port_range=(54000, 54050),
                        interchange_port_range=(54051, 54100),
                        cores_per_worker=1,
                        max_workers=1,
                        provider=AWSProvider(
                            image_id=compute.ami,
                            instance_type=compute.instance_model,
                            worker_init=
                            'pip3 install git+https://[email protected]/globus-labs/ParaOpt@Chaofeng_modification',  # git+https://[email protected]/chaofengwu/paropt',#git+https://[email protected]/macintoshpie/paropt',
                            nodes_per_block=1,
                            init_blocks=1,
                            max_blocks=1,
                            min_blocks=0,
                            walltime='24:00:00',
                            spot_max_bid=2.0,
                            **env_vars),
                    )
                ],
                strategy=None,
            )

            return parsl_config
        except KeyError as e:
            logger.error('Failed initializing aws config: {}'.format(e))
            raise e
        except (HTTPError, URLError, OSError) as e:
            logger.error('Request to metadata service failed: {}'.format(e))
            raise e

    elif isinstance(compute, LocalCompute):
        return Config(executors=[
            ThreadPoolExecutor(max_threads=8, label='local_threads')
        ])

    elif isinstance(compute, PBSProCompute):
        # NOTE: Assumes the paropt is being run on an PBS node with access to metadata service
        try:
            parsl_config = Config(
                executors=[
                    HighThroughputExecutor(
                        label="htex",
                        heartbeat_period=15,
                        heartbeat_threshold=120,
                        worker_debug=True,
                        max_workers=4,
                        address=address_by_interface('ib0'),
                        provider=PBSProProvider(
                            launcher=MpiRunLauncher(),
                            # PBS directives (header lines): for array jobs pass '-J' option
                            # scheduler_options='#PBS -J 1-10',
                            scheduler_options=compute.scheduler_options,
                            # Command to be run before starting a worker, such as:
                            # 'module load Anaconda; source activate parsl_env'.
                            worker_init=compute.worker_init,
                            # number of compute nodes allocated for each block
                            nodes_per_block=1,
                            min_blocks=1,
                            max_blocks=5,
                            cpus_per_node=compute.cpus_per_node,
                            # medium queue has a max walltime of 24 hrs
                            walltime=compute.walltime),
                    ),
                ],
                monitoring=MonitoringHub(
                    hub_address=address_by_interface('ib0'),
                    hub_port=55055,
                    resource_monitoring_interval=10,
                ),
                strategy='simple',
                retries=3,
                app_cache=True,
                checkpoint_mode='task_exit')

            return parsl_config
        except KeyError as e:
            logger.error('Failed initializing PBSPro config: {}'.format(e))
            raise e

    else:
        raise Exception('Unknown Compute type')
Esempio n. 10
0
    ),  # node upon which the top-level parsl script is running
    cores_per_worker=1,
    max_workers=5,  # user tasks/node (up to capacity of machine)
    poll_period=30,
    provider=LocalProvider(  # Dispatch tasks on local machine only
        channel=LocalChannel(),
        init_blocks=1,
        max_blocks=1,
        worker_init=os.environ['PT_ENVSETUP'],  # Initial ENV setup
    ))

## This is based on the *default* executor (*DO NOT USE* due to this
## executor is not recommended by Yadu)
coriLogin = ThreadPoolExecutor(label='coriLogin',
                               managed=True,
                               max_threads=2,
                               storage_access=[],
                               thread_name_prefix='',
                               working_dir=None)

###################################################
###################################################
###################################################

##
## Finally, assemble the full Parsl configuration
##   [Be sure to specify your needed executor(s)]

config = Config(app_cache=True,
                checkpoint_mode='task_exit',
                executors=[knlMj],
                monitoring=MonitoringHub(
  jobs = []
  start_time = time.time()
  for i in range(num_processes):
    jobs.append(long_task(num_processes))
  for j in jobs:
    j.result()
  end_time = time.time()
  return end_time - start_time
  

address_str = "condorfe.crc.nd.edu"

local_config=Config(
  executors=[
    ThreadPoolExecutor(
      max_threads=8,
      label='local_threads'
    )
  ]
)

condor_config=Config(
)

if __name__ == "__main__":
  task_batch_sizes = [1, 10, 50, 100, 250]
  configs = [local_config]
  """
  print("Sequential Workload")
  for config in configs:
    parsl.load(config)
    fig, axes = plt.subplots(1, 1)
Esempio n. 12
0
def parslConfigFromCompute(compute):
    """Given a Compute instance, return a setup parsl configuration"""
    if isinstance(compute, EC2Compute):
        # NOTE: Assumes the paropt is being run on an EC2 instance with access to metadata service
        try:
            public_ip = getAWSPublicIP()

            # get the required environment variables
            required_env_vars = [
                "PAROPT_AWS_REGION", "PAROPT_AWS_KEY_NAME",
                "PAROPT_AWS_STATE_FILE", "PAROPT_AWS_IAM_INSTANCE_PROFILE_ARN"
            ]
            env_vars = {
                varname.replace('PAROPT_AWS_', '').lower(): os.getenv(varname)
                for varname in required_env_vars
            }
            missing_vars = [
                varname for varname, value in env_vars.items() if value == None
            ]
            if missing_vars:
                raise Exception(
                    "Missing required environment variables for running parsl with AWS:\n{}"
                    .format(missing_vars))

            parsl_config = Config(
                executors=[
                    HighThroughputExecutor(
                        label='htex_local',
                        address=public_ip,
                        worker_port_range=(54000, 54050),
                        interchange_port_range=(54051, 54100),
                        cores_per_worker=1,
                        max_workers=1,
                        provider=AWSProvider(
                            image_id=compute.ami,
                            instance_type=compute.instance_model,
                            worker_init=
                            'pip3 install git+https://[email protected]/macintoshpie/paropt',
                            nodes_per_block=1,
                            init_blocks=1,
                            max_blocks=1,
                            min_blocks=0,
                            walltime='01:00:00',
                            spot_max_bid=2.0,
                            **env_vars),
                    )
                ],
                strategy=None,
            )

            return parsl_config
        except KeyError as e:
            logger.error('Failed initializing aws config: {}'.format(e))
            raise e
        except (HTTPError, URLError, OSError) as e:
            logger.error('Request to metadata service failed: {}'.format(e))
            raise e

    elif isinstance(compute, LocalCompute):
        return Config(executors=[
            ThreadPoolExecutor(max_threads=8, label='local_threads')
        ])

    else:
        raise Exception('Unknown Compute type')
Esempio n. 13
0
from parsl import *
# from parsl.monitoring.db_logger import MonitoringConfig
from parsl.monitoring.monitoring import MonitoringHub
from parsl.config import Config
from parsl.executors import ThreadPoolExecutor
import logging

# parsl.set_stream_logger()

threads_config = Config(
    executors=[ThreadPoolExecutor(label='threads', max_threads=4)],
    monitoring=MonitoringHub(
        hub_address="127.0.0.1",
        hub_port=55055,
        logging_level=logging.INFO,
        resource_monitoring_interval=10,
    ))

dfk = DataFlowKernel(config=threads_config)


@App('python', dfk)
def sleeper(dur=25):
    import time
    time.sleep(dur)


@App('python', dfk)
def cpu_stress(dur=30):
    import time
    s = 0
Esempio n. 14
0
                launcher=AprunLauncher(overrides="-d 64"),
                walltime=MY_TIME,
                nodes_per_block=MY_COMPUTE_NODES,
                init_blocks=1,
                min_blocks=1,
                max_blocks=MY_COMPUTE_BLOCKS,
                # string to prepend to #COBALT blocks in the submit
                # script to the scheduler eg: '#COBALT -t 50'
                scheduler_options='',
                # Command to be run before starting a worker, such as:
                worker_init='module load miniconda-3; export PATH=$PATH:{}'.
                format(MY_USER_PATH),
                cmd_timeout=120,
            ),
        ),
        ThreadPoolExecutor(label='login-node', max_threads=8),
    ],
    monitoring=MonitoringHub(
        hub_address=address_by_hostname(),
        hub_port=55055,
        monitoring_debug=False,
        resource_monitoring_interval=10,
    ))
parsl.load(parsl_config)


@python_app(executors=['theta-htex'])
def pi(num_points):
    from random import random

    inside = 0
Esempio n. 15
0
import parsl
import os
from parsl.app.app import python_app

from parsl.config import Config
from parsl.executors import ThreadPoolExecutor

parsl_config = Config(
    executors=[ThreadPoolExecutor(max_threads=8, label='login-node')],
    strategy=None,
)
parsl.load(parsl_config)


@python_app(executors=['login-node'])
def estimate_pi(n_points):
    import numpy as np
    x = np.random.uniform(0, 1, n_points)
    y = np.random.uniform(0, 1, n_points)
    dist = np.sqrt(x * x + y * y)
    n_circle = np.sum(dist <= 1)
    pi_est = 4 * n_circle / n_points
    return pi_est


if __name__ == '__main__':
    import numpy as np
    n_points = 100000
    n_trials = 100
    trials = []
    for i in range(n_trials):
Esempio n. 16
0
def cli_run():
    parser = argparse.ArgumentParser()
    parser.add_argument("--redishost", default="127.0.0.1",
                        help="Address at which the redis server can be reached")
    parser.add_argument("--redisport", default="6379",
                        help="Port on which redis is available")
    parser.add_argument("-d", "--debug", action='store_true',
                        help="Count of apps to launch")
    parser.add_argument("-m", "--mac", action='store_true',
                        help="Configure for Mac")
    args = parser.parse_args()

    if args.debug:
        parsl.set_stream_logger()

    if args.mac:
        config = Config(
            executors=[
                ThreadPoolExecutor(label="theta_mpi_launcher"),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    else:
        config = Config(
            executors=[
                HighThroughputExecutor(
                    label="theta_mpi_launcher",
                    # Max workers limits the concurrency exposed via mom node
                    max_workers=2,
                    provider=LocalProvider(
                        init_blocks=1,
                        max_blocks=1,
                    ),
                ),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    parsl.load(config)

    print('''This program creates an "MPI Method Server" that listens on an input queue and write on an output queue:

        input_queue --> mpi_method_server --> output_queue

To send it a request, add an entry to the input queue:
     run "pipeline-pump -p N" where N is an integer request
To access a result, remove it from the outout queue:
     run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds
     TODO: Timeout does not work yet!
''')

    # input_queue --> mpi_method_server --> output_queue

    input_queue = RedisQueue(args.redishost, port=int(
        args.redisport), prefix='input')
    try:
        input_queue.connect()
    except:
        exit(1)

    output_queue = RedisQueue(args.redishost, port=int(
        args.redisport), prefix='output')
    try:
        output_queue.connect()
    except:
        exit(1)

    # value_server = RedisQueue(args.redishost, port=int(args.redisport), prefix='value')
    # value_server.connect()

    mms = MpiMethodServer(input_queue, output_queue)
    mms.main_loop()

    # Next up, we likely want to add the ability to create a value server and connect it to a method server, e.g.:
    # vs = value_server.ValueServer(output_queue)

    print("All done")
Esempio n. 17
0
                                      provider=GridEngineProvider(
                                                                  init_blocks=1, 
                                                                  max_blocks=20), 
                                      label="workers"),
               ThreadPoolExecutor(label="login", max_threads=20)
              ],
)
"""

config = Config(                                                                                                                                                                                                                                             
    executors=[IPyParallelExecutor(workers_per_node=10,                                                                                                                                                                                                                       
                                      provider=GridEngineProvider(                                                                                                                                                                                           
                                                                  init_blocks=1,                                                                                                                                                                             
                                                                  max_blocks=20),                                                                                                                                                                            
                                      label="workers"),                                                                                                                                                                                                      
               ThreadPoolExecutor(label="login", max_threads=20)                                                                                                                                                                                             
              ],                                                                                                                                                                                                                                             
)  

parsl.set_stream_logger() 
parsl.load(config)

from data_generation import generate_data

proteomefile = sys.argv[1]
directory = f'/home/users/ellenrichards/{sys.argv[2]}/'
threshold = 1000

if not os.path.isdir(directory):
    os.makedirs(directory)
Esempio n. 18
0
import parsl
from parsl import python_app
from parsl.config import Config
from parsl.executors import ThreadPoolExecutor
import sys

config = config = Config(executors=[ThreadPoolExecutor()], run_dir=sys.argv[1])

parsl.load(config)


@python_app
def pi(total):
    import random
    width = 10000
    center = width / 2
    c2 = center**2
    count = 0
    for i in range(total):
        # Drop a random point in the box.
        x, y = random.randint(1, width), random.randint(1, width)
        # Count points within the circle
        if (x - center)**2 + (y - center)**2 < c2:
            count += 1
    return (count * 4 / total)


@python_app
def my_sum(a, b, c):
    return (a + b + c) / 3
Esempio n. 19
0
    if args.named_receptors is not None:
        for named_receptor in args.named_receptors:
            receptor, name = named_receptor.split(":")
            receptors.append(receptor)
            names.append(name)
            print(f"Adding receptor {receptors[-1]}, named {names[-1]}")

    args.receptors = receptors
    args.names = names
    return args


if __name__ == '__main__':

    args = get_args()

    config = Config(
        executors=[
            ThreadPoolExecutor(max_threads=args.n_jobs)
        ],
    )

    print("Parsl loaded.")

    from engines.oe import oedock_from_smiles, setup_receptor_from_file, OEOptions

    parsl.load(config)

    sender(args.a, args.p, args.receptors, args.names)
Esempio n. 20
0
def cli_run():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--redishost",
        default="127.0.0.1",
        help="Address at which the redis server can be reached")
    parser.add_argument("--redisport",
                        default="6379",
                        help="Port on which redis is available")
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="Count of apps to launch")
    parser.add_argument("-m",
                        "--mac",
                        action='store_true',
                        help="Configure for Mac")
    args = parser.parse_args()

    if args.debug:
        parsl.set_stream_logger()

    if args.mac:
        config = Config(
            executors=[
                ThreadPoolExecutor(label="htex"),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    else:
        config = Config(
            executors=[
                HighThroughputExecutor(
                    label="htex",
                    # Max workers limits the concurrency exposed via mom node
                    max_workers=2,
                    provider=LocalProvider(
                        init_blocks=1,
                        max_blocks=1,
                    ),
                ),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    parsl.load(config)

    print(
        '''This program creates an "MPI Method Server" that listens on an inputs queue and write on an output queue:

        input_queue --> mpi_method_server --> queues

To send it a request, add an entry to the inputs queue:
     run "pipeline-pump -p N" where N is an integer request
To access a value, remove it from the outout queue:
     run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds
     TODO: Timeout does not work yet!
''')

    # Get the queues for the method server
    method_queues = MethodServerQueues(args.redishost, port=args.redisport)

    # Start the method server
    mms = ParslMethodServer([target_fun],
                            method_queues,
                            default_executors=['htex'])
    mms.run()