def build_ldl_thresholds(location: str, draws: str, concat_only: bool, verbose: int): output_dir = paths.LDL_C_THRESHOLD_DIR locations = project_globals.LOCATIONS if location == 'all' else [location] if not concat_only: from vivarium_cluster_tools.psimulate.utilities import get_drmaa drmaa = get_drmaa() jobs = {} draw_list = range(1000) if draws == 'all' else draws.split(',') with drmaa.Session() as session: for location in locations: build_ldl_thresholds_single_location(drmaa, jobs, location, draw_list, output_dir, session) if verbose: logger.info('Entering monitoring loop.') logger.info('-------------------------') logger.info('') while any([ job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED] for job in jobs.values() ]): for location, (job_id, status) in jobs.items(): jobs[location] = (job_id, session.jobStatus(job_id)) logger.info( f'{location:<35}: {decode_status(drmaa, jobs[location][1]):>15}' ) logger.info('') time.sleep(project_globals.MAKE_ARTIFACT_SLEEP) logger.info('Checking status again') logger.info('---------------------') logger.info('') for location in locations: sanitized_location = f'{sanitize_location(location)}' path = output_dir / sanitized_location existing_data_path = output_dir / f'{sanitized_location}.hdf' existing_data = [] if existing_data_path.exists(): existing_data.append( pd.read_hdf(output_dir / f'{sanitized_location}.hdf')) existing_data[0].to_hdf( output_dir / f'{sanitized_location}-old.hdf', 'data') threshold_data = pd.concat( existing_data + [pd.read_hdf(file) for file in path.iterdir()], axis=1) threshold_data = threshold_data[[f'draw_{d}' for d in range(1000) ]] # sort the columns threshold_data = utilities.sort_hierarchical_data( threshold_data).convert_objects() threshold_data.to_hdf(output_dir / f'{sanitized_location}.hdf', 'data') shutil.rmtree(path) logger.info('**Done**')
def pcalculate_ckd_rr(location): drmaa = get_drmaa() num_draws = 1000 data_file = ckd_rr.RR_DATA_FOLDER / f'{location}.hdf' if data_file.exists(): # I don't want to write over b/c of issue where writing to same hdf key makes the files huge logger.info( f'Existing data found for {location}. Removing and re-calculating.' ) data_file.unlink() output_path = ckd_rr.RR_DATA_FOLDER / location output_path.mkdir(parents=True, exist_ok=True) logger.info('Submitting jobs.') with drmaa.Session() as s: jt = s.createJobTemplate() jt.remoteCommand = sys.executable jt.nativeSpecification = '-l m_mem_free=1.5G,fthread=1,h_rt=02:00:00 -q all.q -P proj_csu' jt.args = [ckd_rr.__file__, location, 'draw'] jt.jobName = f'{location}_ckd_rr_draw' draw_jids = s.runBulkJobs(jt, 1, num_draws, 1) draw_jid_base = draw_jids[0].split('.')[0] jt.nativeSpecification = f'-l m_mem_free=3G,fthread=1,h_rt=00:45:00 ' \ f'-q all.q -P proj_csu -hold_jid {draw_jid_base}' jt.args = [ckd_rr.__file__, location, 'aggregate'] jt.jobName = f'{location}_ckd_rr_aggregate' agg_jid = s.runJob(jt) logger.info( f'Draws for {location} have been submitted with jid {draw_jid_base}. ' f'They will be aggregated by jid {agg_jid}.')
def build_sample_histories(locations: str, scenarios: str, verbose: int, queue: str): output_dir = paths.SAMPLE_HISTORY_ROOT locations = project_globals.LOCATIONS if locations == 'all' else locations.split(',') scenarios = project_globals.SCENARIOS if scenarios == 'all' else scenarios.split(',') from vivarium_cluster_tools.psimulate.utilities import get_drmaa drmaa = get_drmaa() jobs = {} with drmaa.Session() as session: for location in locations: make_sample_history_single_location(drmaa, queue, jobs, location, scenarios, output_dir, session) if verbose: logger.info('Entering monitoring loop.') logger.info('-------------------------') logger.info('') while any([job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED] for job in jobs.values()]): for location, (job_id, status) in jobs.items(): jobs[location] = (job_id, session.jobStatus(job_id)) logger.info(f'{location:<35}: {decode_status(drmaa, jobs[location][1]):>15}') logger.info('') time.sleep(project_globals.MAKE_ARTIFACT_SLEEP) logger.info('Checking status again') logger.info('---------------------') logger.info('') logger.info('**Done**')
def build_all_artifacts(output_dir, verbose): from vivarium_cluster_tools.psimulate.utilities import get_drmaa drmaa = get_drmaa() jobs = {} with drmaa.Session() as session: for location in project_globals.LOCATIONS: path = output_dir / f'{sanitize_location(location)}.hdf' job_template = session.createJobTemplate() job_template.remoteCommand = shutil.which("python") job_template.args = [__file__, str(path), f'"{location}"'] job_template.nativeSpecification = ( f'-V -b y -P {project_globals.CLUSTER_PROJECT} -q all.q ' f'-l fmem=3G -l fthread=1 -l h_rt=3:00:00 -l archive=TRUE ' f'-N {sanitize_location(location)}_artifact') jobs[location] = (session.runJob(job_template), drmaa.JobState.UNDETERMINED) logger.info( f'Submitted job {jobs[location][0]} to build artifact for {location}.' ) session.deleteJobTemplate(job_template) decodestatus = { drmaa.JobState.UNDETERMINED: 'undetermined', drmaa.JobState.QUEUED_ACTIVE: 'queued_active', drmaa.JobState.SYSTEM_ON_HOLD: 'system_hold', drmaa.JobState.USER_ON_HOLD: 'user_hold', drmaa.JobState.USER_SYSTEM_ON_HOLD: 'user_system_hold', drmaa.JobState.RUNNING: 'running', drmaa.JobState.SYSTEM_SUSPENDED: 'system_suspended', drmaa.JobState.USER_SUSPENDED: 'user_suspended', drmaa.JobState.DONE: 'finished', drmaa.JobState.FAILED: 'failed' } if verbose: logger.info('Entering monitoring loop.') logger.info('-------------------------') logger.info('') while any([ job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED] for job in jobs.values() ]): for location, (job_id, status) in jobs.items(): jobs[location] = (job_id, session.jobStatus(job_id)) logger.info( f'{location:<35}: {decodestatus[jobs[location][1]]:>15}' ) logger.info('') time.sleep(10) logger.info('Checking status again') logger.info('---------------------') logger.info('') logger.info('**Done**')
def build_all_artifacts(output_dir: Path, verbose: int): """Builds artifacts for all locations in parallel. Parameters ---------- output_dir The directory where the artifacts will be built. verbose How noisy the logger should be. Note ---- This function should not be called directly. It is intended to be called by the :func:`build_artifacts` function located in the same module. """ from vivarium_cluster_tools.psimulate.utilities import get_drmaa drmaa = get_drmaa() jobs = {} with drmaa.Session() as session: for location in project_globals.LOCATIONS: path = output_dir / f'{sanitize_location(location)}.hdf' job_template = session.createJobTemplate() job_template.remoteCommand = shutil.which("python") job_template.args = [__file__, str(path), f'"{location}"'] job_template.nativeSpecification = (f'-V ' # Export all environment variables f'-b y ' # Command is a binary (python) f'-P {project_globals.CLUSTER_PROJECT} ' f'-q {project_globals.CLUSTER_QUEUE} ' f'-l fmem={project_globals.MAKE_ARTIFACT_MEM} ' f'-l fthread={project_globals.MAKE_ARTIFACT_CPU} ' f'-l h_rt={project_globals.MAKE_ARTIFACT_RUNTIME} ' f'-l archive=TRUE ' # Need J-drive access for data f'-N {sanitize_location(location)}_artifact') # Name of the job jobs[location] = (session.runJob(job_template), drmaa.JobState.UNDETERMINED) logger.info(f'Submitted job {jobs[location][0]} to build artifact for {location}.') session.deleteJobTemplate(job_template) if verbose: logger.info('Entering monitoring loop.') logger.info('-------------------------') logger.info('') while any([job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED] for job in jobs.values()]): for location, (job_id, status) in jobs.items(): jobs[location] = (job_id, session.jobStatus(job_id)) logger.info(f'{location:<35}: {decode_status(drmaa, jobs[location][1]):>15}') logger.info('') time.sleep(project_globals.MAKE_ARTIFACT_SLEEP) logger.info('Checking status again') logger.info('---------------------') logger.info('') logger.info('**Done**')
def pcalculate_proportion_hypertensive(location): """Calculate 1000 draws of the proportion of the population that has a SBP above the hypertensive threshold (SBP of 140) in parallel and aggregate to a single hdf file saved in the central vivarium artifact store as ``proportion_hypertensive/location.hdf``. This should be run once for each location to generate the data that the artifact builder will look for. LOCATION should be specified as all lower-case, with underscores replacing spaces (i.e., the same way the model artifacts are named), e.g., russian_federation """ drmaa = get_drmaa() num_draws = 1000 data_file = proportion_hypertensive.HYPERTENSION_DATA_FOLDER / f'{location}.hdf' if data_file.exists(): # I don't want to write over b/c of issue where writing to same hdf key makes the files huge logger.info( f'Existing data found for {location}. Removing and re-calculating.' ) data_file.unlink() output_path = proportion_hypertensive.HYPERTENSION_DATA_FOLDER / location output_path.mkdir(parents=True) proportion_hypertensive.prep_input_data(output_path, location) logger.info('Submitting jobs.') with drmaa.Session() as s: jt = s.createJobTemplate() jt.remoteCommand = sys.executable jt.nativeSpecification = '-l m_mem_free=1G,fthread=1,h_rt=00:05:00 -q all.q -P proj_cost_effect' jt.args = [proportion_hypertensive.__file__, location, 'draw'] jt.jobName = f'{location}_prop_hypertensive_draw' draw_jids = s.runBulkJobs(jt, 1, num_draws, 1) draw_jid_base = draw_jids[0].split('.')[0] jt.nativeSpecification = f'-l m_mem_free=3G,fthread=1,h_rt=00:15:00 ' \ f'-q all.q -P proj_cost_effect -hold_jid {draw_jid_base}' jt.args = [proportion_hypertensive.__file__, location, 'aggregate'] jt.jobName = f'{location}_prop_hypertensive_aggregate' agg_jid = s.runJob(jt) logger.info( f'Draws for {location} have been submitted with jid {draw_jid_base}. ' f'They will be aggregated by jid {agg_jid}.')
def kill_jobs(): if "drmaa" not in dir(): # FIXME: The global drmaa should be available here. # This is maybe a holdover from old code? # Maybe something to do with atexit? drmaa = utilities.get_drmaa() try: s.control(array_job_id, drmaa.JobControlAction.TERMINATE) # FIXME: Hack around issue where drmaa.errors sometimes doesn't # exist. except Exception as e: if 'There are no jobs registered' in str(e): # This is the case where all our workers have already shut down # on their own, which isn't actually an error. pass elif 'Discontinued delete' in str(e): # sge has already cleaned up some of the jobs. pass else: raise
def build_joint_pafs(location: str, draws: str, verbose: int, queue: str): # Local import to avoid data dependencies from vivarium_inputs import globals as vi_globals, utilities output_dir = paths.JOINT_PAF_DIR locations = project_globals.LOCATIONS if location == 'all' else [location] from vivarium_cluster_tools.psimulate.utilities import get_drmaa drmaa = get_drmaa() jobs = {} draw_list = {'all': range(1000), 'none': []}.get(draws, draws.split(',')) with drmaa.Session() as session: for location in locations: build_joint_pafs_single_location(drmaa, queue, jobs, location, draw_list, output_dir, session) if verbose: logger.info('Entering monitoring loop.') logger.info('-------------------------') logger.info('') while any([ job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED] for job in jobs.values() ]): for location, (job_id, status) in jobs.items(): jobs[location] = (job_id, session.jobStatus(job_id)) logger.info( f'{location:<35}: {decode_status(drmaa, jobs[location][1]):>15}' ) logger.info('') time.sleep(project_globals.MAKE_ARTIFACT_SLEEP) logger.info('Checking status again') logger.info('---------------------') logger.info('') for location in locations: logger.info(f'Merging data for location - {location}') sanitized_location = sanitize_location(location) location_dir = paths.JOINT_PAF_DIR / sanitized_location existing_data_path = output_dir / f'{sanitized_location}.hdf' joint_pafs = [] if existing_data_path.exists(): joint_pafs.append( pd.read_hdf(output_dir / f'{sanitized_location}.hdf')) joint_pafs[0].to_hdf(output_dir / f'{sanitized_location}-old.hdf', 'data') for file_path in location_dir.iterdir(): draw = file_path.parts[-1].split('.')[0] draw_joint_paf = pd.read_hdf(file_path).rename(columns={0: draw}) draw_joint_paf['affected_measure'] = 'incidence_rate' draw_joint_paf = draw_joint_paf.set_index( list(draw_joint_paf.columns.drop(draw))) joint_pafs.append(draw_joint_paf) joint_paf_data = pd.concat(joint_pafs, axis=1) joint_paf_data = joint_paf_data[ vi_globals.DRAW_COLUMNS] # sort the columns joint_paf_data = utilities.sort_hierarchical_data( joint_paf_data).convert_objects() joint_paf_data.to_hdf(output_dir / f'{sanitized_location}.hdf', 'data') shutil.rmtree(location_dir) logger.info('**Done**')
from pathlib import Path from time import sleep, time import numpy as np import pandas as pd from loguru import logger from vivarium.framework.configuration import build_model_specification from vivarium.framework.utilities import collapse_nested_dict from vivarium.framework.artifact import parse_artifact_path_config from vivarium_cluster_tools.psimulate.branches import Keyspace from vivarium_cluster_tools.psimulate import globals as vct_globals, utilities from vivarium_cluster_tools.psimulate.registry import RegistryManager drmaa = utilities.get_drmaa() class RunContext: def __init__(self, model_specification_file: str, branch_configuration_file: str, output_directory: Path, logging_directories: Dict[str, Path], num_input_draws: int, num_random_seeds: int, restart: bool, expand: Dict[str, int], no_batch: bool): self.number_already_completed = 0 self.output_directory = output_directory self.no_batch = no_batch self.sge_log_directory = logging_directories['sge'] self.worker_log_directory = logging_directories['worker'] if restart:
from pathlib import Path from jinja2 import Template import click from vivarium_gbd_access.gbd import ARTIFACT_FOLDER from vivarium_cluster_tools.psimulate.utilities import get_drmaa JOB_MEMORY_NEEDED = 50 JOB_TIME_NEEDED = '24:00:00' drmaa = get_drmaa() # safe if not on cluster def create_and_run_job(model_spec_path: Path, output_root: Path): with drmaa.Session() as s: jt = s.createJobTemplate() jt.remoteCommand = "build_artifact" jt.nativeSpecification = '-V -l m_mem_free={}G,fthread=1,h_rt={} -q all.q -P proj_cost_effect'.format( JOB_MEMORY_NEEDED, JOB_TIME_NEEDED) jt.args = [model_spec_path, '-o', output_root] jt.jobName = f'build_artifact_{model_spec_path.name}' result = s.runJob(jt) print(f"Submitted job for {model_spec_path.name}. Job id: {result}") @click.command() @click.option( '--model_spec', '-m', multiple=True,