def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) sectoral_share = extract.create_df('sectoral_share', 'sectoral_share_table', index=['yr', 'sandag_sector']) sectoral_pay = extract.create_df('sectoral_pay', 'sectoral_pay_table', index=['yr', 'sandag_sector']) jobs = pd.read_hdf('temp/data.h5', 'jobs') jobs = jobs[['jobs_total']] jobs = jobs.join(sectoral_share, how='right') jobs['sector_jobs'] = (jobs['jobs_total'] * jobs['share']).round() jobs = jobs.drop(['jobs_total'], 1) jobs = jobs.join(sectoral_pay) jobs['tot_ann_job_pay'] = (jobs['sector_jobs'] * jobs['annual_pay']).round() jobs.to_hdf('temp/data.h5', 'sectoral', mode='a') run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] jobs['run_id'] = run_id jobs.to_sql(name='sectors', con=engine, schema='defm', if_exists='append', index=True)
def run(self): econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') local_jobs = extract.create_df('local_jobs', 'local_jobs_table', rate_id=econ_sim_rates.lj_id[0], index=['yr']) in_commuting = extract.create_df('in_commuting', 'in_commuting_table', rate_id=econ_sim_rates.ic_id[0], index=['yr']) work_force_local = pd.read_hdf('temp/data.h5', 'work_force_local') work_force_local = work_force_local.join(local_jobs) work_force_local['jobs_local'] = ( work_force_local['work_force_local'] * work_force_local['jlw']).round() work_force_local = work_force_local.join(in_commuting) work_force_local['jobs_total'] = (work_force_local['jobs_local'] * work_force_local['wh_whlh']).round() work_force_local['jobs_external'] = ( work_force_local['jobs_total'] - work_force_local['jobs_local']).round() # pull information from here work_force_local.to_hdf('temp/data.h5', 'jobs', mode='a')
def run(self): cohort_ur = extract.create_df( 'cohort_ur', 'cohort_ur_table', index=['yr', 'age_cat', 'sex', 'race_ethn']) cohort_ur.to_hdf('temp/data.h5', 'cohort_ur', mode='a') yearly_ur = extract.create_df('yearly_ur', 'yearly_ur_table', index=['yr']) yearly_ur.to_hdf('temp/data.h5', 'yearly_ur', mode='a')
def run(self): econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') cohort_ur = extract.create_df( 'cohort_ur', 'cohort_ur_table', rate_id=econ_sim_rates.ur1_id[0], index=['yr', 'age_cat', 'sex', 'race_ethn']) cohort_ur.to_hdf('temp/data.h5', 'cohort_ur', mode='a') yearly_ur = extract.create_df('yearly_ur', 'yearly_ur_table', rate_id=econ_sim_rates.ur2_id[0], index=['yr']) yearly_ur.to_hdf('temp/data.h5', 'yearly_ur', mode='a')
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, engine, index_col=None) # db_run_id = log.new_run(name='inc_run_log', run_id=db_run_id['max'].iloc[0]) run_id = pd.Series([db_run_id['id'].iloc[0]]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table', rate_id=self.dem_id, index=None) dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a') econ_sim_rates = extract.create_df('econ_sim_rates', 'econ_sim_rates_table', rate_id=self.econ_id, index=None) econ_sim_rates.to_hdf('temp/data.h5', 'econ_sim_rates', mode='a') tables = util.yaml_to_dict('model_config.yml', 'db_tables') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], run_id[0]) in_query2 = getattr(sql, 'inc_mil_hh_pop') % (tables['population_table'], dem_sim_rates.base_population_id[0]) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop = pop[pop['age'] >= 18] pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24' pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34' pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44' pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64' pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74' pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99' pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat']]).sum()) pop.to_hdf('temp/data.h5', 'pop', mode='a')
def run(self): econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') lfpr = extract.create_df('lfp_rates', 'lfp_rates_table', rate_id=econ_sim_rates.lfpr_id[0], index=['yr', 'age_cat', 'sex', 'race_ethn']) lfpr.to_hdf('temp/data.h5', 'lfpr', mode='a')
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) pop = pd.read_hdf('temp/data.h5', 'pop') inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', index=['yr', 'age_cat']) inc_type_rates = inc_type_rates.join(pop) inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share']) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum()) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.pivot_table(inc_type_rates, values='totals', index=['yr'], columns=['income_type']) # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a') inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True) inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True) inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True) inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True) inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True) inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True) inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement', 'Supplemental_Social_Security', 'Social_Security']] run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc_type_rates['run_id'] = run_id inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
def run(self): mig_rates = extract.create_df('migration', 'migration_rate_table') mig_rates = mig_rates[['yr', 'DIN', 'FIN']] mig_rates.to_hdf('temp/data.h5', 'in_mig_rates', format='table', mode='a')
def run(self): aged_pop = pd.read_hdf('temp/data.h5', 'aged_pop') new_pop = pd.read_hdf('temp/data.h5', 'new_pop') rates = pd.read_hdf('temp/data.h5', 'ins_oth_rates') pop = aged_pop.join(new_pop) pop = pop.fillna(0) pop.loc[pop['type'].isin(['COL', 'INS', 'MIL', 'OTH']), ['new_pop']] = 0 pop.loc[pop['mildep'].isin(['Y']), ['new_pop']] = 0 pop = pop.reset_index(drop=False) rates = rates.reset_index(drop=False) pop = pop.set_index(['age', 'race_ethn', 'sex', 'mildep', 'type']) rates = rates.set_index(['age', 'race_ethn', 'sex', 'mildep', 'type']) pop = pop.join(rates) pop = pop.reset_index(drop=False) pop = pop.set_index(['age', 'race_ethn', 'sex']) pop = cp.final_population(pop) householder = extract.create_df('householder', 'householder_table') householder = householder[(householder['yr'] == self.year)] householder = householder.drop(['yr'], 1) pop = pop.join(householder) pop = pop.fillna(0) pop['households'] = (pop['persons'] * pop['householder_rate']).round() pop = pop.drop(['householder_rate'], 1) pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a')
def run(self): mil_pop = pd.read_hdf('temp/data.h5', 'mil_pop') mil_wages = pd.read_hdf('temp/data.h5', 'mil_pay') mil_wages = mil_wages.join(mil_pop) mil_wages['mil_gc_wages'] = (mil_wages['mil_gc_pop'] * mil_wages['annual_pay_gq']).round() mil_wages['mil_hh_wages'] = (mil_wages['mil_hh_pop'] * mil_wages['annual_pay_hp']).round() mil_wages['multiplier'] = 0 aigrm_table = extract.create_df('aigrm', 'aigrm_table', index=None) mil_wages.loc[mil_wages.index.get_level_values('yr') > 2014, ['multiplier']] = ( aigrm_table.aigrm[0] * (mil_wages.index.get_level_values('yr') - 2014)) # pow(1.01, mil_wages.index.get_level_values('yr') - 2014) mil_wages['mil_gc_wages'] = ( mil_wages['mil_gc_wages'] + mil_wages['mil_gc_wages'] * mil_wages['multiplier']) mil_wages['mil_hh_wages'] = ( mil_wages['mil_hh_wages'] + mil_wages['mil_hh_wages'] * mil_wages['multiplier']) mil_wages['military_income'] = (mil_wages['mil_gc_wages'] + mil_wages['mil_hh_wages']).round() mil_wages.to_hdf('temp/data.h5', 'mil_income', mode='a')
def run(self): birth_rates = pd.read_hdf('temp/data.h5', 'birth_rates') pop = pd.read_hdf('temp/data.h5', 'non_mig_pop') pop = pop[(pop['type'] == 'HHP') & (pop['mildep'] == 'N')] birth_rates = compute.rates_for_yr(pop, birth_rates, self.year) birth_rates = birth_rates[(birth_rates['yr'] == self.year)] random_numbers = extract.create_df('random_numbers', 'random_numbers_table') random_numbers = random_numbers[(random_numbers['yr'] == self.year)] random_numbers = random_numbers[['random_number']] births_per_cohort = compute.births_all(birth_rates, self.year, pop_col='non_mig_pop', rand_df=random_numbers) death_rates = pd.read_hdf('temp/data.h5', 'death_rates') death_rates = death_rates[(death_rates['yr'] == self.year)] # sum newborn population across cohorts newborn = compute.births_sum(births_per_cohort, self.year) newborn = newborn.join(death_rates) newborn['new_deaths'] = (newborn['new_born'] * newborn['death_rate']).round() newborn['new_born_survived'] = (newborn['new_born'] - newborn['new_deaths']).round() dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop') dead_pop = dead_pop.join(newborn['new_deaths']) dead_pop = dead_pop.fillna(0) dead_pop['deaths'] = (dead_pop['deaths'] + dead_pop['new_deaths']).round() dead_pop = dead_pop.drop(['new_deaths'], 1) dead_pop.to_hdf('temp/data.h5', 'dead_pop', format='table', mode='a') newborn = newborn.drop(['new_deaths', 'death_rate'], 1) newborn.to_hdf('temp/data.h5', 'new_born', format='table', mode='a')
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') pop = pd.read_hdf('temp/data.h5', 'pop') inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', rate_id=econ_sim_rates.inc1_id[0], index=['yr', 'age_cat']) inc_type_rates = inc_type_rates.join(pop) inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share']) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates['multiplier'] = 0 aigr_table = extract.create_df('aigr', 'aigr_table', rate_id=econ_sim_rates.aigr_id[0], index=None) inc_type_rates.loc[inc_type_rates['yr'] > 2014, ['multiplier']] = (aigr_table.aigr[0] * (inc_type_rates['yr'] - 2014)) # pow(1.01, mil_wages.index.get_level_values('yr') - 2014) inc_type_rates['totals'] = (inc_type_rates['totals'] + inc_type_rates['totals'] * inc_type_rates['multiplier']) inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum()) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.pivot_table(inc_type_rates, values='totals', index=['yr'], columns=['income_type']) # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a') inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True) inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True) inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True) inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True) inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True) inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True) inc_type_rates.rename(columns={'semp': 'Selfemp_Income'}, inplace=True) inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement', 'Supplemental_Social_Security', 'Social_Security', 'Selfemp_Income']] inc_type_rates.to_hdf('temp/data.h5', 'ue_income') run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc_type_rates['run_id'] = run_id inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
def run(self): econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') mil_pay = extract.create_df('mil_pay', 'mil_pay_table', rate_id=econ_sim_rates.mp_id[0], index=['yr']) mil_pay.to_hdf('temp/data.h5', 'mil_pay', mode='a')
def run(self): out_commuting = extract.create_df('out_commuting', 'out_commuting_table', index=['yr']) work_force = pd.read_hdf('temp/data.h5', 'work_force') work_force = work_force.reset_index(drop=False) work_force = pd.DataFrame(work_force[['labor_force', 'unemployed', 'work_force']].groupby([work_force['yr']]).sum()) work_force = work_force.join(out_commuting) work_force['work_force_outside'] = (work_force['work_force'] * work_force['wtlh_lh']).round() work_force['work_force_local'] = (work_force['work_force'] - work_force['work_force_outside']).round() work_force.to_hdf('temp/data.h5', 'local_work_force', mode='a')
def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print 'File exists' else: db_run_id = log.new_run() run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') pop = extract.create_df('population', 'population_table') pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a') pop2 = pop[(pop['type'] == 'HHP')] pop2 = pop2.reset_index(drop=False) pop2 = pd.DataFrame(pop2['persons'].groupby( [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum()) pop2.rename(columns={'persons': 'persons_sum'}, inplace=True) pop2 = pop.join(pop2) pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']), (pop2['persons'] / pop2['persons_sum']), 0) rates = pop2[['mildep', 'type', 'rates']] rates.to_hdf('temp/data.h5', 'ins_oth_rates', format='table', mode='a') engine = create_engine( get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({ 'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths': 0, 'new_born': 0 }) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = 2010 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
def my_form(): econ_sim_ids = extract.create_df('econ_sim_ids', 'econ_sim_ids_table', rate_id=0, index=None) dem_sim_ids = extract.create_df('dem_sim_ids', 'dem_sim_ids_table', rate_id=0, index=None) dems = zip(dem_sim_ids['demographic_simulation_id'], dem_sim_ids['desc_short']) econs = econ_sim_ids['economic_simulation_id'].tolist() startyear = range(2011, 2050) endyear = range(2012, 2051) return render_template("my-form.html", result1=dems, result2=econs, startyear=startyear, endyear=endyear)
def run(self): # create file only first year, exists in subsequent years my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: # only first year db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id) run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table', rate_id=self.dem_id, index=None) dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a') pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0]) pop.to_hdf('temp/data.h5', 'pop', mode='a') # Create function here and test # to get ratio of INS and OTH to HHP to keep constant rates = cp.compute_ins_oth_rate(pop) rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths_hhp_non_mil': 0, 'new_born': 0}) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = self.year - 1 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') trs_rates = extract.create_df('trs', 'trs_table', rate_id=econ_sim_rates.trs_id[0], index=['yr']) hh_income = pd.read_hdf('temp/data.h5', 'hh_income') mil_income = pd.read_hdf('temp/data.h5', 'mil_income') ue_income = pd.read_hdf('temp/data.h5', 'ue_income') inc = hh_income.join(mil_income) inc = inc.join(ue_income) inc = inc.join(trs_rates) inc['unearned_income'] = (inc['Interest'] + inc['Other'] + inc['Public_Assistance'] + inc['Retirement'] + inc['Supplemental_Social_Security'] + inc['Social_Security']).round() inc['personal_income'] = (inc['jobs_local_wages'] + inc['wf_outside_wages'] + inc['unearned_income'] + inc['Selfemp_Income'] + inc['military_income']).round() inc['taxable_retail_sales'] = (inc['personal_income'] * inc['trs_pct']).round() inc = inc[[ 'labor_force', 'unemployed', 'work_force', 'work_force_outside', 'work_force_local', 'jobs_local', 'jobs_total', 'jobs_external', 'avg_wage', 'jobs_total_wages', 'jobs_local_wages', 'jobs_external_wages', 'wf_outside_wages', 'military_income', 'unearned_income', 'Selfemp_Income', 'personal_income', 'taxable_retail_sales' ]] run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc['run_id'] = run_id inc.to_sql(name='emp_summary', con=engine, schema='defm', if_exists='append', index=True)
def run(self): dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates') mig_rates = extract.create_df('migration', 'migration_rate_table', rate_id=dem_sim_rates.migration_rate_id[0]) mig_rates = mig_rates[['yr', 'DOUT', 'FOUT']] mig_rates.to_hdf('temp/data.h5', 'out_mig_rates', mode='a')
def run(self): lfpr = extract.create_df('lfp_rates', 'lfp_rates_table', index=['yr', 'age_cat', 'sex', 'race_ethn']) lfpr.to_hdf('temp/data.h5', 'lfpr', mode='a')
def run(self): birth_rates = extract.create_df('birth', 'birth_rate_table') birth_rates.to_hdf('temp/data.h5', 'birth_rates', format='table', mode='a')
os.chdir(os.path.dirname(full_path)) # set console display to show MultiIndex for every row pd.set_option('display.multi_sparse', False) # rate versions to result database & return primary key for table # db_run_id = log.new_run('model_summary.db') db_run_id = log.new_run('defm.db') years = util.yaml_to_dict('model_config.yml', 'years') # Load rates for all years: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), 'rate', 'year' # pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out birth_rates = extract.create_df('birth', 'rate_table') death_rates = extract.create_df('death', 'rate_table') mig_rates = extract.create_df('migration', 'rate_table', pivot=True) # Load base population: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households' population = extract.create_df('population', 'population_table') # special case ratios ins_ratio = extract.create_df('ins', 'rate_table') oth_ratio = extract.create_df('oth', 'rate_table')
os.chdir(os.path.dirname(full_path)) # set console display to show MultiIndex for every row pd.set_option('display.multi_sparse', False) # rate versions to result database & return primary key for table # db_run_id = log.new_run('model_summary.db') db_run_id = log.new_run() years = util.yaml_to_dict('model_config.yml', 'years') # Load rates for all years: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), 'rate', 'year' # pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out birth_rates = extract.create_df('birth', 'rate_table') death_rates = extract.create_df('death', 'rate_table') mig_rates = extract.create_df('migration', 'rate_table', pivot=True) # Load base population: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households' population = extract.create_df('population', 'population_table') # special case ratios ins_ratio = extract.create_df('ins', 'rate_table') oth_ratio = extract.create_df('oth', 'rate_table') # base population to result database with year
def run(self): mil_pay = extract.create_df('mil_pay', 'mil_pay_table', index=['yr']) mil_pay.to_hdf('temp/data.h5', 'mil_pay', mode='a')
def run(self): dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates') birth_rates = extract.create_df('birth', 'birth_rate_table', rate_id=dem_sim_rates.birth_rate_id[0]) birth_rates.to_hdf('temp/data.h5', 'birth_rates', mode='a')
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string( 'model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, engine, index_col=None) # db_run_id = log.new_run(name='emp_run_log', run_id=db_run_id['max'].iloc[0]) run_id = pd.Series([db_run_id['id'].iloc[0]]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') tables = util.yaml_to_dict('model_config.yml', 'db_tables') dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table', rate_id=self.dem_id, index=None) dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a') econ_sim_rates = extract.create_df('econ_sim_rates', 'econ_sim_rates_table', rate_id=self.econ_id, index=None) econ_sim_rates.to_hdf('temp/data.h5', 'econ_sim_rates', mode='a') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], run_id[0]) in_query2 = getattr(sql, 'inc_mil_hh_pop') % ( tables['population_table'], dem_sim_rates.base_population_id[0]) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04' pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09' pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14' pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17' pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19' pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20' pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21' pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24' pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29' pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34' pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39' pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44' pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49' pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61' pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64' pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66' pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69' pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74' pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79' pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84' pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99' pop = pd.DataFrame(pop['persons'].groupby( [pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum()) pop.to_hdf('temp/data.h5', 'pop', mode='a')