def setup_gdc_pipeline(params): gdc_output_dir = params['gdc_output_dir'] gdc_run_dir = params['gdc_run_dir'] if not os.path.exists(gdc_run_dir): os.makedirs(gdc_run_dir) if params['parsl_config_env'] == 'nscc': parsl_config = get_parsl_config_nscc() elif params['parsl_config_env'] == 'local': parsl_config = get_parsl_config_local() elif params['parsl_config_env'] == 'csi': parsl_config = get_parsl_config_csi() parsl_config.run_dir = gdc_run_dir # Parsl checkpointing: resume using from all available checkpoints parsl_config.checkpoint_files = get_all_checkpoints(gdc_run_dir) # Setup monitoring if parsl_config.monitoring is not None: parsl_config.monitoring.logging_endpoint = "sqlite:///{}/monitoring.db".format( gdc_output_dir) params['parsl_config'] = parsl_config
from parsl.executors import HighThroughputExecutor from parsl.utils import get_all_checkpoints cores_per_slot = 8 worker_init = """ source ~/.bashrc conda activate parsl """ scheduler_options = """ RequestMemory={} """.format(5000 * cores_per_slot) config = Config(executors=[ HighThroughputExecutor( cores_per_worker=1, heartbeat_threshold=120, heartbeat_period=30, provider=CondorProvider( scheduler_options=scheduler_options, cores_per_slot=cores_per_slot, init_blocks=1, max_blocks=1, worker_init=worker_init, ), ) ], checkpoint_mode='task_exit', checkpoint_files=get_all_checkpoints())
init_blocks=1, max_blocks=1, nodes_per_block=node_count, walltime=walltimes[step], ), ) ) else: raise Exception('Invalid scheduler_name {}. Valid schedulers are slurm, grid_engine, and cobalt.'.format(args.scheduler_name)) print("===================================================\n") config = Config(executors=executors) config.retries = int(args.retries) config.checkpoint_mode = 'task_exit' if not args.force: config.checkpoint_files = get_all_checkpoints() parsl.set_stream_logger() parsl.load(config) ############################ # Outputs ############################ all_jobs = [] for sname in subject_dict: subject_jobs = {} # Store jobs in previous steps to use as inputs for step in subject_dict[sname]: params = subject_dict[sname][step] params['cores_per_task'] = int(cores_per_task[step]) inputs = [] actual_prereqs = []