def attach_nid(df):
    """
    add merged (private + medicare supplemental insurance) NIDs
    onto the marketscan data
    """
    nid_dict = {2000: 244369,
                2010: 244370,
                2011: 336850,
                2012: 244371,
                2013: 336849,
                2014: 336848,
                2015: 336847,
                2016: 408680}

    df = hosp_prep.fill_nid(df, nid_dict)

    assert (df['nid'] == 0).sum() == 0, "There are missing NIDs"

    return df
    2001: 87001,
    2002: 87002,
    2003: 87003,
    2004: 87004,
    2005: 87005,
    2006: 87006,
    2007: 87007,
    2008: 87008,
    2009: 87009,
    2010: 87010,
    2011: 87011,
    2012: 114876,
    2013: 160484,
    2014: 237756
}
df = hosp_prep.fill_nid(df, nid_dictionary)

fix_sex_dict = {'Mujeres': 2, 'Mujer': 2, 'Hombres': 1, 'Hombre': 1}
df['sex_id'].replace(fix_sex_dict, inplace=True)
df['sex_id'] = pd.to_numeric(df['sex_id'], downcast='integer', errors='raise')

df.loc[(df['sex_id'] != 1) & (df['sex_id'] != 2), 'sex_id'] = 3

assert set(df.sex_id.unique()).issubset({1, 2, 3})

age_unit_translation_dict = {
    "A\xf1os (1 a 98 a\xf1os de edad)": "Years",
    "Meses (1 a 11 meses de edad)": "Months",
    'D\xedas (1 a 29 d\xedas de edad)': 'Days',
    'Ignorado': 'Unknown',
    'Anios (1 a 115 anios de edad)': "Years",
nid_dict = {
    2004: 86953,
    2005: 86954,
    2006: 86955,
    2007: 86956,
    2008: 86957,
    2009: 86958,
    2010: 94170,
    2011: 94171,
    2012: 121282,
    2013: 150449,
    2014: 220205,
    2015: 281773
}
df = hosp_prep.fill_nid(df, nid_dict)

df.los.value_counts(dropna=False).head()
df.los.isnull().sum()

df = df[df['los'] > 0]
final_admits = len(df)

df['facility_id'] = 'hospital'

df = hosp_prep.age_binning(df, drop_age=True)

int_cols = [
    'location_id', 'year_start', 'year_end', 'age_group_unit', 'age_start',
    'age_end', 'sex_id', 'nid', 'representative_id', 'metric_id'
]