Ejemplo n.º 1
0
def combine_with_rd_raw(df, nid, extract_type_id, location_set_version_id):

    merge_cols = [
        'nid', 'extract_type_id', 'location_id', 'year_id', 'age_group_id',
        'sex_id', 'cause_id', 'site_id'
    ]
    val_cols = ['deaths']

    raw_df = get_phase_output("disaggregation",
                              nid=nid,
                              extract_type_id=extract_type_id)
    raw_df = raw_df.groupby(merge_cols, as_index=False)[val_cols].sum()
    raw_df = raw_df.rename(columns={'deaths': 'deaths_raw'})

    corr_df = get_phase_output("misdiagnosiscorrection",
                               nid=nid,
                               extract_type_id=extract_type_id)
    corr_df = corr_df.groupby(merge_cols, as_index=False)[val_cols].sum()
    corr_df = corr_df.rename(columns={'deaths': 'deaths_corr'})

    rd_df = get_phase_output("redistribution",
                             nid=nid,
                             extract_type_id=extract_type_id)
    rd_df = rd_df.rename(columns={'deaths': 'deaths_rd'})

    df = df.merge(rd_df, how='left', on=merge_cols)
    df = df.merge(corr_df, how='left', on=merge_cols)
    df = df.merge(raw_df, how='left', on=merge_cols)

    for val_col in ['deaths_rd', 'deaths_corr', 'deaths_raw']:
        df[val_col] = df[val_col].fillna(0)

    return df
Ejemplo n.º 2
0
def main(nid, extract_type_id, code_system_id, launch_set_id):
    """Collect source metadata."""
    print_log_message("Reading disaggregation data for source metadata phase.")
    # in stata, this was pulled using corrections phase output (step 3)
    # use data after all garbage codes have been set (step 3 produces ZZZ)
    # but before any redistribution (including HIV redistribution)
    df = get_phase_output("disaggregation", nid=nid,
                          extract_type_id=extract_type_id)

    data_type_id = get_value_from_nid(nid, "data_type_id", extract_type_id)

    source = get_value_from_nid(nid, "source", extract_type_id)

    representative_id = get_value_from_nid(nid, "representative_id",
                                           extract_type_id)

    df = run_phase(df, nid, extract_type_id, data_type_id, source,
                   representative_id, code_system_id)

    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {nid}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id, nid=nid)
    )
    write_phase_output(df, 'sourcemetadata', nid,
                       extract_type_id, launch_set_id)
Ejemplo n.º 3
0
def main(nid, extract_type_id, launch_set_id):
    """Read the data, run the phase, write the output."""
    print_log_message("Reading redistribution data..")
    df = get_phase_output('redistribution',
                          nid=nid,
                          extract_type_id=extract_type_id)

    cause_set_version_id = int(CONF.get_id('cause_set_version'))
    pop_run_id = int(CONF.get_id('pop_run'))
    location_set_version_id = int(CONF.get_id('location_set_version'))

    # run the phase
    df = run_phase(df, nid, extract_type_id, pop_run_id, cause_set_version_id,
                   location_set_version_id)

    # upload to database
    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {n}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id))
    write_phase_output(df, 'corrections', nid, extract_type_id, launch_set_id)