def test_regression_stage_in_does_not_stage_out(): no_stageout_config = Config( executors=[ ThreadPoolExecutor( label='local_threads', storage_access=[NoOpTestingFileStaging(allow_stage_out=False)] ) ], ) parsl.load(no_stageout_config) f = open("test.4", "a") f.write("test") f.close() # Test that stage in does not invoke stage out. If stage out is # attempted, then the NoOpTestingFileStaging provider will raise # an exception which should propagate here. app_test_in(File("test.4")).result() # Test that stage out exceptions propagate to user code. with pytest.raises(NoOpError): touch("test.5", outputs=[File("test.5")]).result() parsl.dfk().cleanup() parsl.clear()
def test_dynamic_executor(): dfk = parsl.load() tasks = [sleeper() for i in range(5)] results = [i.result() for i in tasks] print("Done with initial test. The results are", results) # Here we add a new executor to an active DFK thread_executors = [ThreadPoolExecutor(label='threads2', max_threads=4)] dfk.add_executors(executors=thread_executors) tasks = [cpu_stress() for i in range(8)] results = [i.result() for i in tasks] print( "Successfully added thread executor and ran with it. The results are", results) # We add a htex executor to an active DFK executors = [ HighThroughputExecutor( label='htex_local', cores_per_worker=1, max_workers=5, provider=LocalProvider( init_blocks=1, max_blocks=1, ), ) ] dfk.add_executors(executors=executors) tasks = [add() for i in range(10)] results = [i.result() for i in tasks] print("Successfully added htex executor and ran with it. The results are", results) print("Done testing") parsl.clear()
def configure_parsl(n_threads, monitoring, **kwargs): from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor from parsl.addresses import address_by_hostname if monitoring: from parsl.monitoring import MonitoringHub monitoring = MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10, ) else: monitoring = None local_threads = ThreadPoolExecutor(max_threads=n_threads, label='local_threads') config = Config( executors=[local_threads], monitoring=monitoring, strategy=None, app_cache=True, ) return config
def __init__(self, executors=None, app_cache=True, checkpoint_files=None, checkpoint_mode=None, checkpoint_period="00:30:00", data_management_max_threads=10, lazy_errors=True, retries=0, run_dir='runinfo', strategy='simple', db_logger_config=None, usage_tracking=True): if executors is None: executors = [ThreadPoolExecutor()] self.executors = executors self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode if checkpoint_mode is not 'periodic' and checkpoint_period is not None: logger.debug( "Checkpoint period only has an effect with checkpoint_mode='periodic'" ) self.checkpoint_period = checkpoint_period self.data_management_max_threads = data_management_max_threads self.lazy_errors = lazy_errors self.retries = retries self.run_dir = run_dir self.strategy = strategy self.usage_tracking = usage_tracking self.db_logger_config = db_logger_config
def test_regression_stage_out_does_not_stage_in(): no_stageout_config = Config( executors=[ ThreadPoolExecutor( label='local_threads', storage_access=[NoOpTestingFileStaging(allow_stage_in=False)] ) ] ) parsl.load(no_stageout_config) # Test that the helper app runs with no staging touch("test.1", outputs=[]).result() # Test with stage-out, checking that provider stage in is never # invoked. If stage-in is invoked, the the NoOpTestingFileStaging # provider will raise an exception, which should propagate to # .result() here. touch("test.2", outputs=[File("test.2")]).result() # Test that stage-in exceptions propagate out to user code. with pytest.raises(NoOpError): touch("test.3", inputs=[File("test.3")]).result() parsl.dfk().cleanup() parsl.clear()
def __init__(self, executors: Optional[List[ParslExecutor]] = None, app_cache: bool = True, checkpoint_files: Optional[Sequence[str]] = None, checkpoint_mode: Union[None, Literal['task_exit'], Literal['periodic'], Literal['dfk_exit'], Literal['manual']] = None, checkpoint_period: Optional[str] = None, garbage_collect: bool = True, internal_tasks_max_threads: int = 10, retries: int = 0, retry_handler: Optional[Callable[[Exception, TaskRecord], float]] = None, run_dir: str = 'runinfo', strategy: Optional[str] = 'simple', max_idletime: float = 120.0, monitoring: Optional[MonitoringHub] = None, usage_tracking: bool = False, initialize_logging: bool = True) -> None: if executors is None: executors = [ThreadPoolExecutor()] self.executors = executors self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode if checkpoint_period is not None: if checkpoint_mode is None: logger.debug( 'The requested `checkpoint_period={}` will have no effect because `checkpoint_mode=None`' .format(checkpoint_period)) elif checkpoint_mode != 'periodic': logger.debug( "Requested checkpoint period of {} only has an effect with checkpoint_mode='periodic'" .format(checkpoint_period)) if checkpoint_mode == 'periodic' and checkpoint_period is None: checkpoint_period = "00:30:00" self.checkpoint_period = checkpoint_period self.garbage_collect = garbage_collect self.internal_tasks_max_threads = internal_tasks_max_threads self.retries = retries self.retry_handler = retry_handler self.run_dir = run_dir self.strategy = strategy self.max_idletime = max_idletime self.usage_tracking = usage_tracking self.initialize_logging = initialize_logging self.monitoring = monitoring
def __init__(self, executors: Optional[List[ParslExecutor]] = None, app_cache: bool = True, checkpoint_files: Optional[List[str]] = None, checkpoint_mode: Optional[str] = None, checkpoint_period: Optional[str] = None, data_management_max_threads: int = 10, lazy_errors: bool = True, retries: int = 0, run_dir: str = 'runinfo', strategy: Optional[str] = 'simple', max_idletime: float = 120.0, monitoring: Optional[MonitoringHub] = None, usage_tracking: bool = False, initialize_logging: bool = True): if executors is None: executors = [ThreadPoolExecutor()] self.executors = executors self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode if checkpoint_period is not None: if checkpoint_mode is None: logger.debug( 'The requested `checkpoint_period={}` will have no effect because `checkpoint_mode=None`' .format(checkpoint_period)) elif checkpoint_mode != 'periodic': logger.debug( "Requested checkpoint period of {} only has an effect with checkpoint_mode='periodic'" .format(checkpoint_period)) if checkpoint_mode == 'periodic' and checkpoint_period is None: checkpoint_period = "00:30:00" self.checkpoint_period = checkpoint_period self.data_management_max_threads = data_management_max_threads self.lazy_errors = lazy_errors self.retries = retries self.run_dir = run_dir self.strategy = strategy self.max_idletime = max_idletime self.usage_tracking = usage_tracking self.initialize_logging = initialize_logging self.monitoring = monitoring
def __init__(self, executors=None, app_cache=True, checkpoint_files=None, checkpoint_mode=None, checkpoint_period=None, data_management_max_threads=10, lazy_errors=True, retries=0, run_dir='runinfo', strategy='simple', monitoring=None, usage_tracking=False): if executors is None: executors = [ThreadPoolExecutor()] self.executors = executors self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode if checkpoint_period is not None: if checkpoint_mode is None: logger.debug( 'The requested `checkpoint_period={}` will have no effect because `checkpoint_mode=None`' .format(checkpoint_period)) elif checkpoint_mode != 'periodic': logger.debug( "Requested checkpoint period of {} only has an effect with checkpoint_mode='periodic'" .format(checkpoint_period)) if checkpoint_mode == 'periodic' and checkpoint_period is None: checkpoint_period = "00:30:00" self.checkpoint_period = checkpoint_period self.data_management_max_threads = data_management_max_threads self.lazy_errors = lazy_errors self.retries = retries self.run_dir = run_dir self.strategy = strategy self.usage_tracking = usage_tracking self.monitoring = monitoring
from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor from parsl.tests.utils import get_rundir config = Config( executors=[ ThreadPoolExecutor(label='local_threads_checkpoint_periodic', ) ], checkpoint_mode='periodic', checkpoint_period='00:00:05', run_dir=get_rundir(), )
@python_app(executors=['midway_htex']) def add(n): s = 0 for i in range(n): s += i return s if __name__ == "__main__": tasks = [sleeper() for i in range(5)] results = [i.result() for i in tasks] print("Done with initial test. The results are ", results) thread_executors = [ThreadPoolExecutor( label='threads', max_threads=4) ] dfk.add_executors(executors=thread_executors) tasks = [cpu_stress() for i in range(10)] results = [i.result() for i in tasks] print("Successfully added thread executor and ran with it. The results are ", results) htex_executors = [ HighThroughputExecutor( label="midway_htex", # worker_debug=True, cores_per_worker=1, address=address_by_hostname(), provider=SlurmProvider( 'broadwl',
from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor config = Config( executors=[ ThreadPoolExecutor(max_threads=4), ], app_cache=False, )
from parsl.config import Config from parsl.data_provider.globus import GlobusScheme from parsl.executors.threads import ThreadPoolExecutor # This is an example config, make sure to # replace the specific values below with the literal values # (e.g., 'USERNAME' -> 'your_username') config = Config( executors=[ ThreadPoolExecutor( label='local_threads_globus', storage_access=[ GlobusScheme( endpoint_uuid='UUID', # Please replace UUID with your uuid endpoint_path='PATH' # Please replace PATH with your path ) ], working_dir='PATH' # Please replace PATH with your path ) ], )
import pytest from parsl.executors.threads import ThreadPoolExecutor from parsl.app.app import App from parsl.app.app_factory import AppFactoryFactory, AppFactory workers = ThreadPoolExecutor(max_workers=4) @App('bash', workers) def app_1(stderr='std.err', stdout='std.out'): cmd_line = "echo 'Hello world'" return cmd_line def app_2(stderr='std.err', stdout='std.out'): cmd_line = "echo 'Hello world'" return cmd_line def app_3(x): return x * 2 @pytest.mark.local def test_factory(): appff = AppFactoryFactory('main') app_f = appff.make('bash', app_2, workers, walltime=60) assert isinstance(app_f, AppFactory), "AppFactoryFactory made the wrong type"
from parsl.app.app import bash_app from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor import os import glob databases_translate_table = { "18S": "databases/18S_Periegops_suterii_QMBS/", "12S": "databases/12S_Drymusa_rengan_AToL/", "COI": "databases/COI_Periegops_suterii_QMBS/", "28S": "databases/28S_Periegops_suterii_LUNZ00012725/", "H3": "databases/H3_Periegops_suterii_QMBS/", "16S": "databases/16S_Periegops_suterii_QMBS/", } config = Config(executors=[ThreadPoolExecutor()], lazy_errors=True) parsl.load(config) @bash_app def run_blast_query(input_prefix): database_dir = databases_translate_table[input_prefix] database_name = glob.glob(f"{database_dir}/*.fasta")[0].split( "/")[-1].split(".")[0] #print(database_name) input_file = f"../../input_files/{input_prefix}_in.fasta" output_file = f"{input_prefix}_out.txt" blast_cmd = f"blastn -db {database_name} -query {input_file} -out ../../output_files/{output_file} -outfmt 7" print(blast_cmd) return f"cd {database_dir}; {blast_cmd}; cd ../../;"
import parsl from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor local_threads = Config( executors=[ThreadPoolExecutor(max_threads=4)], lazy_errors=True )
cores_per_worker=272, heartbeat_period=300, heartbeat_threshold=1200, provider=SlurmProvider(CORI_QUEUE, nodes_per_block=COMPUTE_NODES, exclusive=True, init_blocks=1, min_blocks=1, max_blocks=1, scheduler_options="""#SBATCH --constraint=knl""", launcher=SrunLauncher(), cmd_timeout=60, walltime=WALLTIME), ) local_executor = ThreadPoolExecutor(max_threads=2, label="submit-node") if MACHINEMODE == "cori": parsl_config = Config(executors=[cori_in_salloc_executor, local_executor], run_dir="{}/runinfo/".format(work_and_out_path), monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10, )) elif MACHINEMODE == "theta": parsl_config = Config( executors=[theta_executor, local_executor], remote_side_bash_executor_log_base= "/projects/LSSTADSP_DESC/Run2.1i/run201905/bashlogs/",
from parsl.config import Config from parsl.data_provider.globus import GlobusScheme from parsl.executors.threads import ThreadPoolExecutor from parsl.tests.utils import get_rundir # If you are a developer running tests, make sure to update parsl/tests/configs/user_opts.py # If you are a user copying-and-pasting this as an example, make sure to either # 1) create a local `user_opts.py`, or # 2) delete the user_opts import below and replace all appearances of `user_opts` with the literal value # (i.e., user_opts['swan']['username'] -> 'your_username') from .user_opts import user_opts config = Config( executors=[ ThreadPoolExecutor( label='local_threads_globus', storage_access=[GlobusScheme( endpoint_uuid=user_opts['globus']['endpoint'], endpoint_path=user_opts['globus']['path'] )], working_dir=user_opts['globus']['path'] ) ], run_dir=get_rundir() ) remote_writeable = user_opts['globus']['remote_writeable']
import parsl import os import os.path from os import path from parsl.app.app import python_app, bash_app from parsl.configs.local_threads import config from parsl.data_provider.files import File #parsl.set_stream_logger() # <-- log everything to stdout # a configuration to run on local threads from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor local_thread_executor = ThreadPoolExecutor(max_threads=8, label='local_threads') local_thread_config = Config(executors=[local_thread_executor]) # a configuration to run locally with pilot jobs from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.executors import HighThroughputExecutor from parsl.providers import LSFProvider from parsl.providers import SlurmProvider from parsl.launchers import JsrunLauncher from parsl.launchers import SrunLauncher from parsl.addresses import address_by_interface local_htex = HighThroughputExecutor( label="htex_Local", worker_debug=True, cores_per_worker=1,
from parsl.config import Config from parsl.data_provider.file_noop import NoOpFileStaging from parsl.data_provider.http import HTTPInTaskStaging from parsl.executors.threads import ThreadPoolExecutor from parsl.tests.utils import get_rundir config = Config( executors=[ ThreadPoolExecutor( label='local_threads_http_in_task', storage_access=[HTTPInTaskStaging(), NoOpFileStaging()] ) ], run_dir=get_rundir() )
from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor config = Config( executors=[ThreadPoolExecutor()], lazy_errors=True )
with open(args.input) as f: sample_dict = json.load(f) print("Storage dir:") print(" ", os.path.realpath(args.dir)) # Download instance @python_app def down_file(fname, out, ith=None): if ith is not None: print(ith) os.system("xrdcp -P " + fname + " " + out) return 0 # Setup multithreading config = Config(executors=[ThreadPoolExecutor(max_threads=8)]) parsl.load(config) # Write futures out_dict = {} # Output filename list run_futures = [] # Future list for key in sorted(sample_dict.keys()): new_list = [] #print(key) for i, fname in enumerate(sample_dict[key][:args.limit]): if i % 5 == 0: # print some progress info ith = f'{key}: {i}/{len(sample_dict[key])}' else: ith = None out = os.path.join(os.path.realpath(args.dir),
@bash_app def tar_list(tarfile, stdout='taroutput.txt'): # note we send output to stdout.txt, but don't do anything with it! bashcmd = '/usr/bin/tar tfz {}'.format(tarfile) return(bashcmd) # Specify the config for the machine the data will land on, particularly the # Globus endpoint by UUID within the storage_access construct: config = Config( executors=[ ThreadPoolExecutor( label='local_threads_globus', working_dir='/sdcc/u/dcde1000006/globus-scratch', storage_access=[GlobusScheme( endpoint_uuid='23f78cc8-41e0-11e9-a618-0a54e005f950' )], ) ], ) parsl.clear() parsl.load(config) # Try a trivial staging exercise, pulling this file if it's not already available: # Note this is not a public file! You probably want to find one you can read. tarfile = File('globus://e133a52e-6d04-11e5-ba46-22000b92c6ec/archive/d3c724/bbcp.tar.Z') f = tar_list(tarfile)
from parsl import load, python_app from parsl.configs.local_threads import config load(config) import pandas as pd import numpy as np import time df = pd.read_csv("/home/amanda/Downloads/bill_authentication.csv") from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor maxThreads = 10 local_threads = Config(executors=[ ThreadPoolExecutor(max_threads=maxThreads, label='local_threads') ]) @python_app def rfClassifier(estimators): import pandas as pd import numpy as np dataset = df dataset.head() X = dataset.iloc[:, 0:4].values y = dataset.iloc[:, 4].values
from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor config = Config( executors=[ThreadPoolExecutor(label='local_threads_checkpoint', )], )
def __init__(self, config=Config()): """Initialize the DataFlowKernel. Parameters ---------- config : Config A specification of all configuration options. For more details see the :class:~`parsl.config.Config` documentation. """ # this will be used to check cleanup only happens once self.cleanup_called = False if isinstance(config, dict): raise ConfigurationError( 'Expected `Config` class, received dictionary. For help, ' 'see http://parsl.readthedocs.io/en/stable/stubs/parsl.config.Config.html') self._config = config self.run_dir = make_rundir(config.run_dir) if config.initialize_logging: parsl.set_file_logger("{}/parsl.log".format(self.run_dir), level=logging.DEBUG) logger.debug("Starting DataFlowKernel with config\n{}".format(config)) if sys.version_info < (3, 6): logger.warning("Support for python versions < 3.6 is deprecated and will be removed after parsl 0.10") logger.info("Parsl version: {}".format(get_version())) self.checkpoint_lock = threading.Lock() self.usage_tracker = UsageTracker(self) self.usage_tracker.send_message() # Monitoring self.run_id = str(uuid4()) self.tasks_completed_count = 0 self.tasks_failed_count = 0 self.tasks_dep_fail_count = 0 self.monitoring = config.monitoring # hub address and port for interchange to connect self.hub_address = None self.hub_interchange_port = None if self.monitoring: if self.monitoring.logdir is None: self.monitoring.logdir = self.run_dir self.hub_address = self.monitoring.hub_address self.hub_interchange_port = self.monitoring.start(self.run_id) self.time_began = datetime.datetime.now() self.time_completed = None # TODO: make configurable logger.info("Run id is: " + self.run_id) self.workflow_name = None if self.monitoring is not None and self.monitoring.workflow_name is not None: self.workflow_name = self.monitoring.workflow_name else: for frame in inspect.stack(): fname = os.path.basename(str(frame.filename)) parsl_file_names = ['dflow.py', 'typeguard.py'] # Find first file name not considered a parsl file if fname not in parsl_file_names: self.workflow_name = fname break self.workflow_version = str(self.time_began.replace(microsecond=0)) if self.monitoring is not None and self.monitoring.workflow_version is not None: self.workflow_version = self.monitoring.workflow_version workflow_info = { 'python_version': "{}.{}.{}".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro), 'parsl_version': get_version(), "time_began": self.time_began, 'time_completed': None, 'workflow_duration': None, 'run_id': self.run_id, 'workflow_name': self.workflow_name, 'workflow_version': self.workflow_version, 'rundir': self.run_dir, 'tasks_completed_count': self.tasks_completed_count, 'tasks_failed_count': self.tasks_failed_count, 'user': getuser(), 'host': gethostname(), } if self.monitoring: self.monitoring.send(MessageType.WORKFLOW_INFO, workflow_info) checkpoints = self.load_checkpoints(config.checkpoint_files) self.memoizer = Memoizer(self, memoize=config.app_cache, checkpoint=checkpoints) self.checkpointed_tasks = 0 self._checkpoint_timer = None self.checkpoint_mode = config.checkpoint_mode self.data_manager = DataManager(self) self.executors = {} data_manager_executor = ThreadPoolExecutor(max_threads=config.data_management_max_threads, label='data_manager') self.add_executors(config.executors + [data_manager_executor]) if self.checkpoint_mode == "periodic": try: h, m, s = map(int, config.checkpoint_period.split(':')) checkpoint_period = (h * 3600) + (m * 60) + s self._checkpoint_timer = Timer(self.checkpoint, interval=checkpoint_period, name="Checkpoint") except Exception: logger.error("invalid checkpoint_period provided: {0} expected HH:MM:SS".format(config.checkpoint_period)) self._checkpoint_timer = Timer(self.checkpoint, interval=(30 * 60), name="Checkpoint") # if we use the functionality of dynamically adding executors # all executors should be managed. if any([x.managed for x in config.executors]): self.flowcontrol = FlowControl(self) else: self.flowcontrol = FlowNoControl(self) self.task_count = 0 self.tasks = {} self.submitter_lock = threading.Lock() atexit.register(self.atexit_cleanup)
import parsl from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor local_threads = Config( executors=[ThreadPoolExecutor(max_threads=4, label='local_threads')])
from parsl.config import Config from parsl.data_provider.data_manager import default_staging from parsl.data_provider.globus import GlobusStaging from parsl.executors.threads import ThreadPoolExecutor from parsl.tests.utils import get_rundir # If you are a developer running tests, make sure to update parsl/tests/configs/user_opts.py # If you are a user copying-and-pasting this as an example, make sure to either # 1) create a local `user_opts.py`, or # 2) delete the user_opts import below and replace all appearances of `user_opts` with the literal value # (i.e., user_opts['swan']['username'] -> 'your_username') from .user_opts import user_opts storage_access = default_staging + [ GlobusStaging(endpoint_uuid=user_opts['globus']['endpoint'], endpoint_path=user_opts['globus']['path']) ] config = Config(executors=[ ThreadPoolExecutor(label='local_threads_globus', working_dir=user_opts['globus']['path'], storage_access=storage_access) ], run_dir=get_rundir()) remote_writeable = user_opts['globus']['remote_writeable']
def fresh_config(): return Config( executors=[ThreadPoolExecutor()], run_dir=get_rundir(), )
from libsubmit.channels import SSHChannel from libsubmit.providers import SlurmProvider from parsl.config import Config from parsl.executors.ipp import IPyParallelExecutor from parsl.executors.threads import ThreadPoolExecutor config = Config(executors=[ IPyParallelExecutor( label='midway', provider=SlurmProvider( 'westmere', channel=SSHChannel( hostname='swift.rcc.uchicago.edu', username='******', script_dir='/scratch/midway2/annawoodard/parsl_scripts', ), init_blocks=1, min_blocks=1, max_blocks=1000, nodes_per_block=1, tasks_per_node=2, overrides= 'module load singularity; module load Anaconda3/5.1.0; source activate parsl_py36' ), ), ThreadPoolExecutor(label='local', max_threads=2) ], )
from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor config = Config( executors=[ ThreadPoolExecutor( label='local_threads_checkpoint_task_exit', ) ], checkpoint_mode='task_exit' )