Ejemplo n.º 1
0
def build_joint_pafs_single_location(drmaa, queue: str, jobs: Dict,
                                     location: str, draws, output_dir: Path,
                                     session):
    sanitized_location = sanitize_location(location)
    path = output_dir / sanitized_location
    if path.exists() and len(draws) == 1000:
        shutil.rmtree(path)
    path.mkdir(exist_ok=True, mode=0o775)

    for draw in draws:
        job_template = session.createJobTemplate()
        job_template.remoteCommand = shutil.which("python")
        job_template.outputPath = f":{output_dir}/output_logs"
        job_template.errorPath = f":{output_dir}/error_logs"
        job_template.args = [__file__, str(path), f'"{location}"', draw]
        job_template.nativeSpecification = (
            f'-V '  # Export all environment variables
            f'-b y '  # Command is a binary (python)
            f'-P {project_globals.CLUSTER_PROJECT} '
            f'-q {queue} '
            f'-l fmem={project_globals.MAKE_ARTIFACT_MEM} '
            f'-l fthread={project_globals.MAKE_ARTIFACT_CPU} '
            f'-l h_rt={project_globals.MAKE_ARTIFACT_RUNTIME} '
            f'-l archive=TRUE '  # Need J-drive access for data
            f'-N {sanitize_location(location)}_joint_pafs')  # Name of the job
        jobs[location] = (session.runJob(job_template),
                          drmaa.JobState.UNDETERMINED)
        logger.info(
            f'Submitted job {jobs[location][0]} to build joint pafs for {location} and draw {draw}.'
        )
        session.deleteJobTemplate(job_template)
Ejemplo n.º 2
0
def make_sample_history_single_location(drmaa, queue: str, jobs: Dict, location: str, scenarios: List[str],
                                        output_dir: Path, session):
    sanitized_location = sanitize_location(location)
    path = output_dir / sanitized_location
    if not path.exists():
        path.mkdir(exist_ok=True, mode=0o775)
        (path / 'error_logs').mkdir(exist_ok=True, mode=0o775)
        (path / 'output_logs').mkdir(exist_ok=True, mode=0o775)

    for scenario in scenarios:
        job_template = session.createJobTemplate()
        job_template.remoteCommand = shutil.which("python")
        job_template.outputPath = f":{path}/output_logs"
        job_template.errorPath = f":{path}/error_logs"
        job_template.args = [__file__, str(path), f'"{sanitized_location}"', scenario]
        job_template.nativeSpecification = (f'-V '  # Export all environment variables
                                            f'-b y '  # Command is a binary (python)
                                            f'-P {project_globals.CLUSTER_PROJECT} '
                                            f'-q {queue} '
                                            f'-l fmem={project_globals.MAKE_ARTIFACT_MEM} '
                                            f'-l fthread={project_globals.MAKE_ARTIFACT_CPU} '
                                            f'-l h_rt={project_globals.MAKE_ARTIFACT_RUNTIME} '
                                            f'-l archive=TRUE '  # Need J-drive access for data
                                            f'-N {sanitize_location(location)}_{scenario}_sample_history')  # Job name
        jobs[location] = (session.runJob(job_template), drmaa.JobState.UNDETERMINED)
        logger.info(f'Submitted job {jobs[location][0]} to generate sample history '
                    f'for {location} and scenario {scenario}.')
        session.deleteJobTemplate(job_template)
Ejemplo n.º 3
0
def sample_raw_rx_change(location: str, draw: int, rx_change: str) -> float:
    location = sanitize_location(location)
    """Raw result: needs to be adjusted"""
    seed = get_hash(f'{rx_change}_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(paths.PROB_ADDING_DRUGS).set_index('probability_type')
    params = data.loc[rx_change, :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 4
0
def sample_probability_target_given_rx(location: str, draw: int) -> float:
    location = sanitize_location(location)
    seed = get_hash(
        f'target_given_rx_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(paths.PROB_TARGET_GIVEN_RX).set_index(LOCATION_COLUMN)
    params = data.loc[location, :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 5
0
def sample_adherence(location: str, draw: int, multi_pill: bool,
                     previous_cve: bool) -> float:
    location = sanitize_location(location)
    seed = get_hash(f'adherence_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(paths.ADHERENCE_PARAMETERS).set_index(
        [LOCATION_COLUMN, 'multi_pill', 'previous_cve'])
    params = data.loc[(location, int(multi_pill), int(previous_cve)), :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 6
0
def sample_probability_testing_ldl_c(location: str, draw: int) -> float:
    location = sanitize_location(location)
    seed = get_hash(
        f'testing_ldl_c_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(
        paths.PROB_TESTING_LDL_C_PATH).set_index(LOCATION_COLUMN)
    params = data.loc[location, :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 7
0
def build_artifacts(location: str, output_dir: str, append: bool,
                    verbose: int):
    """Main application function for building artifacts.
    Parameters
    ----------
    location
        The location to build the artifact for.  Must be one of the
        locations specified in the project globals or the string 'all'.
        If the latter, this application will build all artifacts in
        parallel.
    output_dir
        The path where the artifact files will be built.
    append
        Whether we should append to existing artifacts at the given output
        directory.  Has no effect if artifacts are not found.
    verbose
        How noisy the logger should be.
    """
    output_dir = Path(output_dir)

    if location in project_globals.LOCATIONS:
        path = Path(output_dir) / f'{sanitize_location(location)}.hdf'

        if path.exists() and not append:
            click.confirm(
                f"Existing artifact found for {location}. Do you want to delete and rebuild?",
                abort=True)
            logger.info(f'Deleting artifact at {str(path)}.')
            path.unlink()

        build_single_location_artifact(path, location)

    elif location == 'all':
        # FIXME: could be more careful
        existing_artifacts = set([
            item.stem for item in output_dir.iterdir()
            if item.is_file() and item.suffix == '.hdf'
        ])
        locations = set(
            [sanitize_location(loc) for loc in project_globals.LOCATIONS])
        existing = locations.intersection(existing_artifacts)

        if existing and not append:
            click.confirm(
                f'Existing artifacts found for {existing}. Do you want to delete and rebuild?',
                abort=True)
            for loc in existing:
                path = output_dir / f'{loc}.hdf'
                logger.info(f'Deleting artifact at {str(path)}.')
                path.unlink()

        build_all_artifacts(output_dir, verbose)

    else:
        raise ValueError(
            f'Location must be one of {project_globals.LOCATIONS} or the string "all". '
            f'You specified {location}.')
Ejemplo n.º 8
0
def sample_raw_drug_prescription(location: str, draw: int, drug: str) -> float:
    location = sanitize_location(location)
    """Raw result: needs to be adjusted"""
    seed = get_hash(
        f'{drug}_prescription_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(paths.CURRENT_RX_DATA_PATH).set_index(
        [LOCATION_COLUMN, 'current_prescription'])
    params = data.loc[(location, drug.replace('_', ' ')), :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 9
0
def sample_therapy_type(location: str, draw: int, therapy_type: str) -> float:
    location = sanitize_location(location)
    therapy_type = therapy_type.upper(
    ) if therapy_type is FDC else therapy_type
    seed = get_hash(
        f'{therapy_type}_probability_draw_{draw}_location_{location}')
    data = pd.read_csv(paths.PROB_THERAPY_TYPE).set_index(
        [LOCATION_COLUMN, 'therapy_type'])
    params = data.loc[(location, therapy_type), :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 10
0
def sample_probability_increasing_dose(scenario: str, location: str,
                                       draw: int) -> float:
    location = sanitize_location(location)
    scenario = scenario if scenario == 'baseline' else 'intervention'
    seed = get_hash(
        f'target_given_rx_probability_scenario_{scenario}_draw_{draw}_location_{location}'
    )
    data = pd.read_csv(paths.PROB_ADDING_DRUGS).set_index(
        [LOCATION_COLUMN, 'scenario'])
    params = data.loc[(location, scenario), :]
    return sample_truncnorm_distribution(seed, params[MEAN_COLUMN],
                                         params[SD_COLUMN])
Ejemplo n.º 11
0
def build_joint_pafs(location: str, draws: str, verbose: int, queue: str):
    # Local import to avoid data dependencies
    from vivarium_inputs import globals as vi_globals, utilities

    output_dir = paths.JOINT_PAF_DIR
    locations = project_globals.LOCATIONS if location == 'all' else [location]

    from vivarium_cluster_tools.psimulate.utilities import get_drmaa
    drmaa = get_drmaa()
    jobs = {}
    draw_list = {'all': range(1000), 'none': []}.get(draws, draws.split(','))
    with drmaa.Session() as session:
        for location in locations:
            build_joint_pafs_single_location(drmaa, queue, jobs, location,
                                             draw_list, output_dir, session)

        if verbose:
            logger.info('Entering monitoring loop.')
            logger.info('-------------------------')
            logger.info('')

            while any([
                    job[1] not in [drmaa.JobState.DONE, drmaa.JobState.FAILED]
                    for job in jobs.values()
            ]):
                for location, (job_id, status) in jobs.items():
                    jobs[location] = (job_id, session.jobStatus(job_id))
                    logger.info(
                        f'{location:<35}: {decode_status(drmaa, jobs[location][1]):>15}'
                    )
                logger.info('')
                time.sleep(project_globals.MAKE_ARTIFACT_SLEEP)
                logger.info('Checking status again')
                logger.info('---------------------')
                logger.info('')

    for location in locations:
        logger.info(f'Merging data for location - {location}')
        sanitized_location = sanitize_location(location)
        location_dir = paths.JOINT_PAF_DIR / sanitized_location

        existing_data_path = output_dir / f'{sanitized_location}.hdf'
        joint_pafs = []
        if existing_data_path.exists():
            joint_pafs.append(
                pd.read_hdf(output_dir / f'{sanitized_location}.hdf'))
            joint_pafs[0].to_hdf(output_dir / f'{sanitized_location}-old.hdf',
                                 'data')

        for file_path in location_dir.iterdir():
            draw = file_path.parts[-1].split('.')[0]
            draw_joint_paf = pd.read_hdf(file_path).rename(columns={0: draw})
            draw_joint_paf['affected_measure'] = 'incidence_rate'
            draw_joint_paf = draw_joint_paf.set_index(
                list(draw_joint_paf.columns.drop(draw)))
            joint_pafs.append(draw_joint_paf)

        joint_paf_data = pd.concat(joint_pafs, axis=1)
        joint_paf_data = joint_paf_data[
            vi_globals.DRAW_COLUMNS]  # sort the columns
        joint_paf_data = utilities.sort_hierarchical_data(
            joint_paf_data).convert_objects()
        joint_paf_data.to_hdf(output_dir / f'{sanitized_location}.hdf', 'data')
        shutil.rmtree(location_dir)

    logger.info('**Done**')