def deaths(df, db_id, sim_year): """ Calculate deaths by applying death rates to non-migrating population Parameters ---------- df : pandas DataFrame population and death rates for current yr db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- df : pandas DataFrame survived population per cohort for a given year """ df['deaths'] = (df['non_mig_pop'] * df['death_rate']).round() # deaths_out = df[df.deaths != 0] # report out deaths # log.insert_run('deaths.db', db_id, df, 'survived_' + str(sim_year)) deaths_out = df[df.deaths != 0].copy() deaths_out = deaths_out.drop(['mig_in_net'], 1) log.insert_run('defm.db', db_id, deaths_out, 'deaths_by_cohort_by_age') deaths_out = deaths_out.reset_index(drop=False) deaths_out = deaths_out.drop( ['non_mig_pop', 'death_rate', 'age', 'households'], 1) deaths_grouped = deaths_out.groupby( ['yr', 'race_ethn', 'mildep', 'sex', 'type'], as_index=False).sum() # log.insert_run('defm.db', db_id, deaths_grouped, 'deaths_sum_by_age') # log.insert_run('defm.db', db_id, df, 'deaths') # SPECIAL CASES # deaths not carried over into next year df['survived'] = np.where( ((df['type'] == 'HP') & (df['mildep'] == 'Y')) | df['type'].isin(['COL', 'INS', 'MIL', 'OTH']), df['non_mig_pop'], # special case df['non_mig_pop'] - df['deaths']) # else # drop other unnecessary columns survived_out = df[df.non_mig_pop != 0].copy() survived_out = survived_out.drop(['mig_in_net'], 1) log.insert_run('defm.db', db_id, survived_out, 'survived') df = df.drop(['deaths', 'yr', 'death_rate', 'non_mig_pop'], 1) return df
def net_mig(df, db_id, sim_year): """ Calculate net migration by applying rates to population Parameters ---------- df : pandas.DataFrame with population and migration rates for current yr db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- df : pandas DataFrame In and out migrating population per cohort for a given year population x migration rate, where rates are: domestic in (DIN), domestic out (DOUT), foreign in (FIN), and foreign out (FOUT) """ # SPECIAL CASE: no migration, set rates to zero # when group quarters = "HP" and mildep = "Y" df.loc[((df.type == 'HP') & (df.mildep == 'Y')), ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0 # when group quarters equal COL, INS, MIL, or OTH df.loc[df['type'].isin(['COL', 'INS', 'MIL', 'OTH']), ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0 # calculate net migration df['mig_Dout'] = (df['persons'] * df['DOUT']).round() df['mig_Fout'] = (df['persons'] * df['FOUT']).round() df['mig_Din'] = (df['persons'] * df['DIN']).round() df['mig_Fin'] = (df['persons'] * df['FIN']).round() df['mig_out_net'] = df['mig_Dout'] + df['mig_Fout'] df['mig_in_net'] = df['mig_Din'] + df['mig_Fin'] net_mig_db = df[(df.DIN != 0) | (df.DOUT != 0) | (df.FIN != 0) | (df.FOUT != 0)].copy() log.insert_run('defm.db', db_id, net_mig_db, 'net_migration') return df
def non_mig(nm_df, db_id, sim_year): """ Calculate non-migration population by subtracting net out migrating from population Parameters ---------- nm_df : pandas.DataFrame with population for current yr and population migrating in & out db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- nm_df : pandas DataFrame non-migrating population per cohort for a given year """ nm_df['non_mig_pop'] = nm_df['persons'] - nm_df['mig_out_net'] # drop unnecessary columns nm_df = nm_df[[ 'type', 'mildep', 'households', 'persons', 'mig_out_net', 'non_mig_pop', 'mig_in_net', 'yr' ]] # record non migration population in result database # remove rows that have zero population nm_db = nm_df[nm_df.non_mig_pop != 0].copy() nm_db = nm_db.drop(['mig_in_net'], 1) log.insert_run('defm.db', db_id, nm_db, 'non_migrating') # drop year column in order to join w birth and death rates nm_df = nm_df.drop(['yr', 'persons', 'mig_out_net'], 1) return nm_df
def non_mig(nm_df, db_id, sim_year): """ Calculate non-migration population by subtracting net out migrating from population Parameters ---------- nm_df : pandas.DataFrame with population for current yr and population migrating in & out db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- nm_df : pandas DataFrame non-migrating population per cohort for a given year """ nm_df['non_mig_pop'] = nm_df['persons'] - nm_df['mig_out_net'] # drop unnecessary columns nm_df = nm_df[['type', 'mildep','households','persons','mig_out_net','non_mig_pop','mig_in_net','yr']] # record non migration population in result database # remove rows that have zero population nm_db = nm_df[nm_df.non_mig_pop != 0].copy() nm_db = nm_db.drop(['mig_in_net'],1) log.insert_run('defm.db', db_id, nm_db, 'non_migrating') # drop year column in order to join w birth and death rates nm_df = nm_df.drop(['yr','persons','mig_out_net'],1) return nm_df
def deaths(df, db_id, sim_year): """ Calculate deaths by applying death rates to non-migrating population Parameters ---------- df : pandas DataFrame population and death rates for current yr db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- df : pandas DataFrame survived population per cohort for a given year """ df['deaths'] = (df['non_mig_pop'] * df['death_rate']).round() # deaths_out = df[df.deaths != 0] # report out deaths # log.insert_run('deaths.db', db_id, df, 'survived_' + str(sim_year)) deaths_out = df[df.deaths != 0].copy() deaths_out = deaths_out.drop(['mig_in_net'], 1) log.insert_run('defm.db', db_id, deaths_out, 'deaths_by_cohort_by_age') deaths_out = deaths_out.reset_index(drop=False) deaths_out = deaths_out.drop(['non_mig_pop', 'death_rate', 'age','households'], 1) deaths_grouped = deaths_out.groupby(['yr', 'race_ethn', 'mildep', 'sex', 'type'], as_index=False).sum() log.insert_run('defm.db', db_id, deaths_grouped, 'deaths_sum_by_age') # log.insert_run('defm.db', db_id, df, 'deaths') # SPECIAL CASES # deaths not carried over into next year df['survived'] = np.where( ((df['type'] == 'HP') & (df['mildep'] == 'Y')) | df['type'].isin(['COL','INS','MIL','OTH']), df['non_mig_pop'], # special case df['non_mig_pop'] - df['deaths']) # else # drop other unnecessary columns survived_out = df[df.non_mig_pop != 0].copy() survived_out = survived_out.drop(['mig_in_net'], 1) log.insert_run('defm.db', db_id, survived_out, 'survived') df = df.drop(['deaths', 'yr', 'death_rate', 'non_mig_pop'], 1) return df
# columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households' population = extract.create_df('population', 'population_table') # special case ratios ins_ratio = extract.create_df('ins', 'rate_table') oth_ratio = extract.create_df('oth', 'rate_table') # base population to result database with year # log.insert_run('base_defm.db', db_run_id, population,'base_population' y0 = years['y1'] - 1 population['yr'] = y0 log.insert_run('defm.db', db_run_id, population, 'population') population = population.drop(['yr'], 1) # years to be used in model population_summary = [] # initialize list for population by year # iterate over all years for index, yr in enumerate(range(years['y1'],years['yf'] + 1)): print ('{} {}'.format(index, yr)) # MIGRATION # rates for simulated yr joined with population DataFrame
# Load base population: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households' population = extract.create_df('population', 'population_table') # special case ratios ins_ratio = extract.create_df('ins', 'rate_table') oth_ratio = extract.create_df('oth', 'rate_table') # base population to result database with year # log.insert_run('base_defm.db', db_run_id, population,'base_population' y0 = years['y1'] - 1 population['yr'] = y0 log.insert_run('defm.db', db_run_id, population, 'population') population = population.drop(['yr'], 1) # years to be used in model population_summary = [] # initialize list for population by year # iterate over all years for index, yr in enumerate(range(years['y1'], years['yf'] + 1)): print('{} {}'.format(index, yr)) # MIGRATION # rates for simulated yr joined with population DataFrame yr_mig = compute.rates_for_yr(population, mig_rates, yr)
def births_sum(df,db_id,sim_year): """ Sum births over all the ages in a given cohort Set birth age to zero and reset DataFrame index Parameters ---------- df : pandas DataFrame male and female births for each cohort and non-migrating population db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- births_age0 : pandas DataFrame births summed across age for each cohort """ df = df.reset_index(drop=False) df = df[['yr', 'race_ethn', 'mildep','type','births_m','births_f']] births_grouped = df.groupby(['yr', 'race_ethn', 'mildep', 'type'], as_index=False).sum() male_births = births_grouped.copy() male_births.rename(columns={'births_m': 'persons'}, inplace=True) male_births['sex'] = 'M' male_births['age'] = 0 male_births = male_births.set_index(['age','race_ethn','sex']) male_births = male_births.drop('births_f',1) female_births = births_grouped.copy() female_births.rename(columns={'births_f': 'persons'}, inplace=True) female_births['sex'] = 'F' female_births['age'] = 0 female_births = female_births.set_index(['age','race_ethn','sex']) female_births = female_births.drop('births_m',1) births_mf = pd.concat([male_births, female_births], axis=0) births_mf['households'] = 0 # temp ignore households # no births for this special case births_mf = births_mf[-births_mf['type'].isin(['COL','MIL','INS','OTH'])] newborns = births_mf[births_mf.persons != 0].copy() newborns.rename(columns={'persons': 'newborns'}, inplace=True) newborns = newborns.drop('households', 1) log.insert_run('defm.db', db_id, newborns, 'newborns') # log.insert_run('newborns.db', db_id, births_mf, 'newborns_' + # str(sim_year)) births_mf = births_mf.drop('yr', 1) # SPECIAL CASE: # Births are estimated & reported out, but are not carried over into the # next year ( base_population.type="HP" and base_population.mildep="Y") # keep rows in which either type != 'HP' OR mildep != 'Y' # which results in dropping rows where type = 'HP' AND mildep = 'Y' births_mf = births_mf[((births_mf.type != 'HP') | (births_mf.mildep != 'Y'))] return births_mf
def births_all(b_df, db_id, sim_year): """ Calculate births for given year based on rates. Predict male births as 51% of all births & female births as 49%. Result is nearest integer (floor) after +0 or +0.5 (randomly generated) Parameters ---------- b_df : pandas.DataFrame with population for current yr and birth rates db_id : int primary key for current simulation sim_year : int year being simulated Returns ------- b_df : pandas DataFrame male and female births by cohort (race_ethn and age) """ # SPECIAL CASE: no births, set rates to zero # when group quarters in ("COL","INS","MIL","OTH") b_df.loc[b_df['type'].isin(['COL','INS','MIL','OTH']), ['birth_rate']] = 0 # total births = population * birth rate (fill blanks w zero) b_df['births_rounded'] = (b_df['non_mig_pop'] * b_df['birth_rate']).fillna(0.0) b_df = b_df.round({'births_rounded': 0}) # note: no longer works after pandas 0.18.0 - use above code # b_df['births_rounded'] = np.round( # b_df['non_mig_pop'] * b_df['birth_rate']).fillna(0.0).astype(int) # male births 51% b_df['births_m_float'] = b_df['births_rounded'] * 0.51 # 0 or 0.5 generated randomly by multiplying 0 or 1 by 0.5 np.random.seed(2010) b_df['randomNumCol'] = 0.5 * np.random.randint(2, size=b_df.shape[0]) # Add random 0 or 0.5 # Convert to int which truncates float (floor) b_df['births_m'] = b_df['births_m_float'] + b_df['randomNumCol'] b_df['births_m'] = b_df['births_m'].astype(int) # female births b_df['births_f'] = b_df['births_rounded'] - b_df['births_m'] # remove rows w no birth rate # use yr column since yr column in original birth rates DataFrame b_df_notnull = b_df[b_df.yr.notnull()].copy() #log.insert_run('births.db', db_id, b_df_notnull, # 'births_' + str(sim_year)) # Remove zero rows births_db = b_df_notnull[(b_df_notnull.births_m != 0) | (b_df_notnull.births_m != 0)].copy() births_db = births_db.reset_index(drop=False) births_db = births_db.drop('mig_in_net',1) births_db = births_db.drop('sex',1) births_db.rename(columns={'births_m': 'male births'}, inplace=True) births_db.rename(columns={'births_f': 'female births'}, inplace=True) births_db.rename(columns={'randomNumCol': 'add (random) then floor'}, inplace=True) births_db.rename(columns={'age': 'mother age'}, inplace=True) births_db.rename(columns={'race_ethn': 'mother race_ethn'}, inplace=True) births_db.rename(columns={'births_rounded': 'total births'}, inplace=True) births_db.rename(columns={'births_m_float': 'male births (float)'}, inplace=True) log.insert_run('defm.db', db_id, births_db, 'mothers_n_births') return b_df_notnull