def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print 'File exists' else: db_run_id = log.new_run() run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') pop = extract.create_df('population', 'population_table') pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a') pop2 = pop[(pop['type'] == 'HHP')] pop2 = pop2.reset_index(drop=False) pop2 = pd.DataFrame(pop2['persons'].groupby( [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum()) pop2.rename(columns={'persons': 'persons_sum'}, inplace=True) pop2 = pop.join(pop2) pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']), (pop2['persons'] / pop2['persons_sum']), 0) rates = pop2[['mildep', 'type', 'rates']] rates.to_hdf('temp/data.h5', 'ins_oth_rates', format='table', mode='a') engine = create_engine( get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({ 'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths': 0, 'new_born': 0 }) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = 2010 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: db_run_id = log.new_run(name='emp_run_log') run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop']) in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population']) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04' pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09' pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14' pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17' pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19' pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20' pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21' pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24' pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29' pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34' pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39' pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44' pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49' pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61' pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64' pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66' pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69' pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74' pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79' pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84' pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99' pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum()) print pop.head() pop.to_hdf('temp/data.h5', 'pop', mode='a')
def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: db_run_id = log.new_run(name='inc_run_log') run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop']) in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population']) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop = pop[pop['age'] >= 18] pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24' pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34' pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44' pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64' pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74' pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99' pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat']]).sum()) pop.to_hdf('temp/data.h5', 'pop', mode='a')
def run(self): # create file only first year, exists in subsequent years my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: # only first year db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id) run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table', rate_id=self.dem_id, index=None) dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a') pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0]) pop.to_hdf('temp/data.h5', 'pop', mode='a') # Create function here and test # to get ratio of INS and OTH to HHP to keep constant rates = cp.compute_ins_oth_rate(pop) rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths_hhp_non_mil': 0, 'new_born': 0}) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = self.year - 1 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
# Housekeeping stuff # measure script time start_time = time.time() # change to current directory to find .yml input config file full_path = os.path.abspath(inspect.getfile(inspect.currentframe())) os.chdir(os.path.dirname(full_path)) # set console display to show MultiIndex for every row pd.set_option('display.multi_sparse', False) # rate versions to result database & return primary key for table # db_run_id = log.new_run('model_summary.db') db_run_id = log.new_run('defm.db') years = util.yaml_to_dict('model_config.yml', 'years') # Load rates for all years: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), 'rate', 'year' # pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out birth_rates = extract.create_df('birth', 'rate_table') death_rates = extract.create_df('death', 'rate_table') mig_rates = extract.create_df('migration', 'rate_table', pivot=True) # Load base population: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households'
# Housekeeping stuff # measure script time start_time = time.time() # change to current directory to find .yml input config file full_path = os.path.abspath(inspect.getfile(inspect.currentframe())) os.chdir(os.path.dirname(full_path)) # set console display to show MultiIndex for every row pd.set_option('display.multi_sparse', False) # rate versions to result database & return primary key for table # db_run_id = log.new_run('model_summary.db') db_run_id = log.new_run() years = util.yaml_to_dict('model_config.yml', 'years') # Load rates for all years: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), 'rate', 'year' # pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out birth_rates = extract.create_df('birth', 'rate_table') death_rates = extract.create_df('death', 'rate_table') mig_rates = extract.create_df('migration', 'rate_table', pivot=True) # Load base population: SQL query to pandas DataFrame # columns: 'age', 'race_ethn', 'sex' (cohort), # 'gq.type', 'mildep', 'persons', 'households'