def main(nid, extract_type_id, code_system_id, launch_set_id): """Collect source metadata.""" print_log_message("Reading disaggregation data for source metadata phase.") # in stata, this was pulled using corrections phase output (step 3) # use data after all garbage codes have been set (step 3 produces ZZZ) # but before any redistribution (including HIV redistribution) df = get_phase_output("disaggregation", nid=nid, extract_type_id=extract_type_id) data_type_id = get_value_from_nid(nid, "data_type_id", extract_type_id) source = get_value_from_nid(nid, "source", extract_type_id) representative_id = get_value_from_nid(nid, "representative_id", extract_type_id) df = run_phase(df, nid, extract_type_id, data_type_id, source, representative_id, code_system_id) print_log_message( "Writing {n} rows of output for launch set {ls}, nid {nid}, extract " "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id, nid=nid) ) write_phase_output(df, 'sourcemetadata', nid, extract_type_id, launch_set_id)
def main(nid, extract_type_id, launch_set_id): """Run the noise reduction phase.""" # download data from input database print_log_message("Beginning noise reduction phase") data_type_id = get_value_from_nid(nid, 'data_type_id', extract_type_id=extract_type_id) source = get_value_from_nid(nid, 'source', extract_type_id=extract_type_id) model_group = get_value_from_nid(nid, 'model_group', extract_type_id=extract_type_id) malaria_model_group = get_malaria_model_group_from_nid( nid, extract_type_id) df = run_phase(nid, extract_type_id, launch_set_id, data_type_id, source, model_group, malaria_model_group) print_log_message( "Writing {n} rows of output for launch set {ls}, nid {nid}, extract " "{e}".format(n=len(df), ls=launch_set_id, nid=nid, e=extract_type_id)) ids = [ 'age_group_id', 'cause_id', 'extract_type_id', 'location_id', 'year_id', 'site_id', 'sex_id', 'nid' ] df[ids] = df[ids].astype(int) write_phase_output(df, 'noisereduction', nid, extract_type_id, launch_set_id)
def main(nid, extract_type_id, launch_set_id): """Read the data, run the phase, write the output.""" print_log_message("Reading redistribution data..") df = get_phase_output('redistribution', nid=nid, extract_type_id=extract_type_id) cause_set_version_id = int(CONF.get_id('cause_set_version')) pop_run_id = int(CONF.get_id('pop_run')) location_set_version_id = int(CONF.get_id('location_set_version')) # run the phase df = run_phase(df, nid, extract_type_id, pop_run_id, cause_set_version_id, location_set_version_id) # upload to database print_log_message( "Writing {n} rows of output for launch set {ls}, nid {n}, extract " "{e}".format(n=len(df), ls=launch_set_id, e=extract_type_id)) write_phase_output(df, 'corrections', nid, extract_type_id, launch_set_id)
def main(nid, extract_type_id, code_system_id, launch_set_id, remove_decimal): """Main method""" start_time = time.time() df = get_claude_data( "disaggregation", nid=nid, extract_type_id=extract_type_id ) data_type_id = get_value_from_nid(nid, 'data_type_id', extract_type_id=extract_type_id) iso3 = get_value_from_nid(nid, 'iso3', extract_type_id=extract_type_id) df = run_pipeline(df, nid, extract_type_id, code_system_id, remove_decimal, data_type_id, iso3) run_time = time.time() - start_time print_log_message("Finished in {} seconds".format(run_time)) write_phase_output( df, "misdiagnosiscorrection", nid, extract_type_id, launch_set_id ) return df
def write_outputs(df, int_cause, source, nid, extract_type_id, inj_garbage): """ write_phase_output - for nonlimited use data write to limited use folder - for limited use data""" if source in MCauseLauncher.limited_sources: limited_dir = get_limited_use_directory(source, int_cause, inj_garbage) print_log_message(f"writing {source} to limited use dir") print_log_message(limited_dir) df.to_csv( f"{limited_dir}/{nid}_{extract_type_id}_format_map.csv", index=False) else: if inj_garbage: print_log_message( "writing formatted df with only injuries garbage codes as UCOD" ) subdirs = f"{int_cause}/thesis/inj_garbage" else: subdirs = f"{int_cause}/thesis" print_log_message( f"Writing nid {nid}, extract_type_id {extract_type_id}") write_phase_output(df, "format_map", nid, extract_type_id, ymd_timestamp(), sub_dirs=subdirs)