def main(nid, extract_type_id, code_system_id, launch_set_id):
    """Collect source metadata."""
    print_log_message("Reading disaggregation data for source metadata phase.")
    # in stata, this was pulled using corrections phase output (step 3)
    # use data after all garbage codes have been set (step 3 produces ZZZ)
    # but before any redistribution (including HIV redistribution)
    df = get_phase_output("disaggregation", nid=nid,
                          extract_type_id=extract_type_id)

    data_type_id = get_value_from_nid(nid, "data_type_id", extract_type_id)

    source = get_value_from_nid(nid, "source", extract_type_id)

    representative_id = get_value_from_nid(nid, "representative_id",
                                           extract_type_id)

    df = run_phase(df, nid, extract_type_id, data_type_id, source,
                   representative_id, code_system_id)

    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {nid}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id, nid=nid)
    )
    write_phase_output(df, 'sourcemetadata', nid,
                       extract_type_id, launch_set_id)
def main(nid, extract_type_id, launch_set_id):
    """Run the noise reduction phase."""
    # download data from input database
    print_log_message("Beginning noise reduction phase")

    data_type_id = get_value_from_nid(nid,
                                      'data_type_id',
                                      extract_type_id=extract_type_id)
    source = get_value_from_nid(nid, 'source', extract_type_id=extract_type_id)
    model_group = get_value_from_nid(nid,
                                     'model_group',
                                     extract_type_id=extract_type_id)
    malaria_model_group = get_malaria_model_group_from_nid(
        nid, extract_type_id)

    df = run_phase(nid, extract_type_id, launch_set_id, data_type_id, source,
                   model_group, malaria_model_group)

    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {nid}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, nid=nid, e=extract_type_id))
    ids = [
        'age_group_id', 'cause_id', 'extract_type_id', 'location_id',
        'year_id', 'site_id', 'sex_id', 'nid'
    ]
    df[ids] = df[ids].astype(int)
    write_phase_output(df, 'noisereduction', nid, extract_type_id,
                       launch_set_id)
Example #3
0
def main(nid, extract_type_id, launch_set_id):
    """Read the data, run the phase, write the output."""
    print_log_message("Reading redistribution data..")
    df = get_phase_output('redistribution',
                          nid=nid,
                          extract_type_id=extract_type_id)

    cause_set_version_id = int(CONF.get_id('cause_set_version'))
    pop_run_id = int(CONF.get_id('pop_run'))
    location_set_version_id = int(CONF.get_id('location_set_version'))

    # run the phase
    df = run_phase(df, nid, extract_type_id, pop_run_id, cause_set_version_id,
                   location_set_version_id)

    # upload to database
    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {n}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id))
    write_phase_output(df, 'corrections', nid, extract_type_id, launch_set_id)
def main(nid, extract_type_id, code_system_id, launch_set_id, remove_decimal):
    """Main method"""

    start_time = time.time()
    df = get_claude_data(
        "disaggregation", nid=nid, extract_type_id=extract_type_id
    )

    data_type_id = get_value_from_nid(nid, 'data_type_id', extract_type_id=extract_type_id)
    iso3 = get_value_from_nid(nid, 'iso3', extract_type_id=extract_type_id)

    df = run_pipeline(df, nid, extract_type_id, code_system_id, remove_decimal, data_type_id, iso3)

    run_time = time.time() - start_time
    print_log_message("Finished in {} seconds".format(run_time))

    write_phase_output(
        df, "misdiagnosiscorrection", nid, extract_type_id, launch_set_id
    )
    return df
Example #5
0
def write_outputs(df, int_cause, source, nid, extract_type_id, inj_garbage):
    """
    write_phase_output - for nonlimited use data
    write to limited use folder - for limited use data"""

    if source in MCauseLauncher.limited_sources:
        limited_dir = get_limited_use_directory(source, int_cause, inj_garbage)
        print_log_message(f"writing {source} to limited use dir")
        print_log_message(limited_dir)
        df.to_csv(
            f"{limited_dir}/{nid}_{extract_type_id}_format_map.csv",
            index=False)
    else:
        if inj_garbage:
            print_log_message(
                "writing formatted df with only injuries garbage codes as UCOD"
            )
            subdirs = f"{int_cause}/thesis/inj_garbage"
        else:
            subdirs = f"{int_cause}/thesis"
        print_log_message(
            f"Writing nid {nid}, extract_type_id {extract_type_id}")
        write_phase_output(df, "format_map", nid, extract_type_id,
                           ymd_timestamp(), sub_dirs=subdirs)