Ejemplo n.º 1
0
def add_coverage_metadata(df):
    ''' Returns the dataframe with a boolean indication of whether each
            registry covers it's associated location_id
    '''
    merge_col = ['registry_index']
    metadata_cols = ['full_coverage', 'national_registry']
    assert all(c in df.columns for c in merge_col), \
        "add_coverage_metadata requires {} column(s)".format(merge_col)
    if (all(c in df.columns for c in metadata_cols)
            and df.loc[:, metadata_cols].notnull().all().all()):
        return (df)
    else:
        assert df.loc[:, merge_col].notnull().all().all(), \
            "add_coverage_metadata cannot accept nulls for {} column(s)".format(
                merge_col)
    input_len = len(df)
    df = df.drop(labels=metadata_cols, axis=1, errors='ignore')
    reg_df = cdb.db_api().get_table("registry")
    reg_df.rename(columns={'coverage_of_location_id': 'full_coverage'},
                  inplace=True)
    reg_df.loc[(reg_df['location_id'] == reg_df['country_id'])
               & reg_df['full_coverage'].isin([1]), 'national_registry'] = 1
    reg_df.loc[reg_df['national_registry'].isnull(), 'national_registry'] = 0
    df = df.merge(reg_df[merge_col + metadata_cols], how='left', on=merge_col)
    assert len(df) == input_len, "Data dropped while adding coverage metadata"
    return (df)
Ejemplo n.º 2
0
def is_exception(dataset_id, data_type_id):
    ''' Determines if dataset is flagged such that negative values are accepted
    '''
    db_link = cdb.db_api()
    tbl = db_link.get_table("prep_exception")
    is_exception = tbl.loc[tbl['dataset_id'].eq(dataset_id) & 
                        tbl['data_type_id'].eq(data_type_id) &
                        tbl['prep_exception_type_id'].eq(1) &
                        tbl['processing_status_id'].eq(2),
                        :].any().any()
    return(is_exception)
Ejemplo n.º 3
0
def procedure_me_id(acause):
    ''' If the cause is associated with a tumorectomy procedure, returns the 
            modelable_entity_id of that procedure. Otherwise returns None.
    '''
    me_table = (cdb.db_api('cancer_db')).get_table('cnf_model_entity')
    me_id = me_table.loc[me_table['is_active'].eq(1)
                         & me_table['acause'].eq(acause)
                         & me_table['me_tag'].eq('procedure_proportion'),
                         'modelable_entity_id']
    if len(me_id) == 0:
        me_id = None
    else:
        me_id = me_id.item()
    return (me_id)
Ejemplo n.º 4
0
def load_surv_folder(cnf_model_run_id):
    ''' Using the cnf_lambda_version_id, returns the datestamp suffix
            of that version
    '''
    surv_folder = surv_folder = utils.get_path("relative_survival",
                                               process="nonfatal_model")
    record = nd.get_run_record(cnf_model_run_id)
    rs_version = record.at[0, 'rel_survival_version_id']
    db_link = cdb.db_api()
    this_version = db_link.get_entry(table_name='rel_survival_version',
                                     uniq_col='rel_survival_version_id',
                                     val=rs_version)
    suffix = str(this_version.at[0, 'date_updated'])
    rs_folder = surv_folder.replace("<date>", suffix)
    return (rs_folder)
Ejemplo n.º 5
0
def load_lambda_file(cnf_model_run_id):
    ''' Using the cnf_lambda_version_id, returns the datestamp suffix
            of that version
    '''
    lambda_file_default = utils.get_path("lambda_values",
                                         process="nonfatal_model")
    record = nd.get_run_record(cnf_model_run_id)
    lambda_version = record.at[0, 'cnf_lambda_version_id']
    db_link = cdb.db_api()
    this_version = db_link.get_entry(table_name='cnf_lambda_version',
                                     uniq_col='cnf_lambda_version_id',
                                     val=lambda_version)
    suffix = str(this_version.at[0, 'date_updated'])
    lambda_file = lambda_file_default.replace("<date>", suffix)
    return (lambda_file)
Ejemplo n.º 6
0
def _add_ihme_pop_marker(df):
    ''' Returns the dataframe with an added 'ihme_pop_ok' column indicating 
            whether ihme population estimates may be merged with the uid
    '''
    if not 'sdi_quintile' in df.columns:
        df = modeled_locations.add_sdi_quintile(df)
    if not 'full_coverage' in df.columns:
        df = add_coverage_metadata(df)
    ds_df = cdb.db_api().get_table("dataset")
    df.loc[:, 'ihme_pop_ok'] = 0
    for dsid in df['dataset_id'].unique():
        pop_ok = ds_df.loc[ds_df['dataset_id'] == dsid,
                           'can_use_ihme_pop'].values[0]
        if pop_ok == 1:
            df.loc[df['dataset_id'] == dsid, 'ihme_pop_ok'] = pop_ok
    ihme_pop_ok = (df['sdi_quintile'].isin([5]) &
                   (df['full_coverage'].isin([1])))
    df.loc[ihme_pop_ok, 'ihme_pop_ok'] = 1
    return (df)
Ejemplo n.º 7
0
def load_durations(acause):
    '''
    '''
    db_link = cdb.db_api('cancer_db')
    if acause[:8] == "neo_liver_":
        sequelae_cause = "neo_liver"
    elif acause == "neo_leukemia_other":
        sequelae_cause = "neo_leukemia_ll_chronic"
    elif acause == "neo_nmsc":
        sequelae_cause = "neo_nmsc_scc"
    elif acause == "neo_other_cancer":
        sequelae_cause = "neo_other"
    else:
        sequelae_cause = acause
    sq_df = db_link.get_table('sequela_durations')
    this_sq = sq_df.loc[sq_df['acause'] == sequelae_cause, :]
    this_sq.loc[:, 'acause'] = acause
    assert this_sq['sequela_duration'].notnull().all(), "error loading sequela durations"
    assert len(this_sq) > 0, "Error loading sequela durations"
    return(this_sq[['acause', 'me_tag', 'sequela_duration']])
Ejemplo n.º 8
0
def sequelae_fractions(acause):
    ''' Defines fractions from lit review to be used when splitting sequela
    '''
    # Set fractions of population recieving treatment resulting in disability
    pros_incont_frac = 0.18  # pct. who primarily develop incontinence
    pros_impot_frac = 0.55  # pct. who primarily develop impotence
    # Define dict
    fractions = {
        'neo_prostate': {
            # Fractions used to calculate the controlled phase
            18781: {
                'fraction': pros_impot_frac
            },  # with impotence
            18782: {
                'fraction': pros_incont_frac
            },  # with incontinence
            # Fractions used to calculate the metrics of sequela beyond ten years
            18784: {
                'fraction': pros_impot_frac
            },
            18785: {
                'fraction': pros_incont_frac
            }
        }
    }
    # Add me_tags to dict (enables later linking of data to modelable_entity_id)
    me_tbl = cdb.db_api().get_table("cnf_model_entity")
    meids = list(fractions['neo_prostate'].keys())
    for me in meids:
        if me_tbl.loc[me_tbl['modelable_entity_id'].eq(me),
                      'is_active'].item() == 0:
            del fractions['neo_prostate'][me]
        else:
            tag = me_tbl.loc[me_tbl['modelable_entity_id'].eq(me),
                             'me_tag'].item()
            fractions['neo_prostate'][me]['me_tag'] = tag
    if acause in fractions.keys():
        return (fractions[acause])
    else:
        return (False)
Ejemplo n.º 9
0
def add_representativeness(df):
    ''' Returns the dataframe with Added 'representative' tag column indicating 
            whether data are representative of their attached location_id
    '''
    def _avg_repness(regs, rep_table):
        ''' Iff all registries are representative, returns 1. Else returns 0
        '''
        try:
            if not isinstance(regs, tuple) and not isinstance(regs, list):
                try:
                    regs = list(literal_eval(regs))
                except:
                    regs = list(regs)
            rep = rep_table.loc[rep_table['registry_index'].isin(regs),
                                'representative_of_location_id']
            if len(rep) == 0:
                return (0)
            else:
                return (rep.min())
        except:
            return (0)

    print("adding representative status...")
    db_link = cdb.db_api("cancer_db")
    # Add representative status based on the input registries
    rep_status = db_link.get_table("registry")[[
        'registry_index', 'representative_of_location_id'
    ]]
    rep_df = pd.DataFrame(
        {'registry_index': df['registry_index'].unique().tolist()})
    get_repness = partial(_avg_repness, rep_table=rep_status)
    rep_df.loc[:,
               'representative'] = rep_df['registry_index'].apply(get_repness)
    output = df.merge(rep_df, on='registry_index')
    output = update_repness(output)
    assert len(output) == len(
        df), "add_representativeness is adding or deleting data"
    return (output)