Python create_df Exemples, db.extract.create_df Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : emp_luigi.py Projet : jugalm/pydefm

    def run(self):
        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))

        sectoral_share = extract.create_df('sectoral_share',
                                           'sectoral_share_table',
                                           index=['yr', 'sandag_sector'])
        sectoral_pay = extract.create_df('sectoral_pay',
                                         'sectoral_pay_table',
                                         index=['yr', 'sandag_sector'])

        jobs = pd.read_hdf('temp/data.h5', 'jobs')

        jobs = jobs[['jobs_total']]
        jobs = jobs.join(sectoral_share, how='right')
        jobs['sector_jobs'] = (jobs['jobs_total'] * jobs['share']).round()
        jobs = jobs.drop(['jobs_total'], 1)

        jobs = jobs.join(sectoral_pay)
        jobs['tot_ann_job_pay'] = (jobs['sector_jobs'] *
                                   jobs['annual_pay']).round()

        jobs.to_hdf('temp/data.h5', 'sectoral', mode='a')
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        jobs['run_id'] = run_id

        jobs.to_sql(name='sectors',
                    con=engine,
                    schema='defm',
                    if_exists='append',
                    index=True)

Exemple #2

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

    def run(self):
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')
        local_jobs = extract.create_df('local_jobs',
                                       'local_jobs_table',
                                       rate_id=econ_sim_rates.lj_id[0],
                                       index=['yr'])
        in_commuting = extract.create_df('in_commuting',
                                         'in_commuting_table',
                                         rate_id=econ_sim_rates.ic_id[0],
                                         index=['yr'])

        work_force_local = pd.read_hdf('temp/data.h5', 'work_force_local')
        work_force_local = work_force_local.join(local_jobs)
        work_force_local['jobs_local'] = (
            work_force_local['work_force_local'] *
            work_force_local['jlw']).round()
        work_force_local = work_force_local.join(in_commuting)
        work_force_local['jobs_total'] = (work_force_local['jobs_local'] *
                                          work_force_local['wh_whlh']).round()
        work_force_local['jobs_external'] = (
            work_force_local['jobs_total'] -
            work_force_local['jobs_local']).round()

        # pull information from here
        work_force_local.to_hdf('temp/data.h5', 'jobs', mode='a')

Exemple #3

0

Afficher le fichier

Fichier : emp_luigi.py Projet : jugalm/pydefm

 def run(self):
     cohort_ur = extract.create_df(
         'cohort_ur',
         'cohort_ur_table',
         index=['yr', 'age_cat', 'sex', 'race_ethn'])
     cohort_ur.to_hdf('temp/data.h5', 'cohort_ur', mode='a')
     yearly_ur = extract.create_df('yearly_ur',
                                   'yearly_ur_table',
                                   index=['yr'])
     yearly_ur.to_hdf('temp/data.h5', 'yearly_ur', mode='a')

Exemple #4

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

    def run(self):
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        cohort_ur = extract.create_df(
            'cohort_ur',
            'cohort_ur_table',
            rate_id=econ_sim_rates.ur1_id[0],
            index=['yr', 'age_cat', 'sex', 'race_ethn'])
        cohort_ur.to_hdf('temp/data.h5', 'cohort_ur', mode='a')
        yearly_ur = extract.create_df('yearly_ur',
                                      'yearly_ur_table',
                                      rate_id=econ_sim_rates.ur2_id[0],
                                      index=['yr'])
        yearly_ur.to_hdf('temp/data.h5', 'yearly_ur', mode='a')

Exemple #5

0

Afficher le fichier

    def run(self):

            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
            sql_in_engine = create_engine(db_connection_string)

            in_query = getattr(sql, 'max_run_id')
            db_run_id = pd.read_sql(in_query, engine, index_col=None)
            # db_run_id = log.new_run(name='inc_run_log', run_id=db_run_id['max'].iloc[0])

            run_id = pd.Series([db_run_id['id'].iloc[0]])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')

            dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table',
                                              rate_id=self.dem_id, index=None)
            dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a')

            econ_sim_rates = extract.create_df('econ_sim_rates', 'econ_sim_rates_table',
                                              rate_id=self.econ_id, index=None)
            econ_sim_rates.to_hdf('temp/data.h5', 'econ_sim_rates', mode='a')

            tables = util.yaml_to_dict('model_config.yml', 'db_tables')

            in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], run_id[0])
            in_query2 = getattr(sql, 'inc_mil_hh_pop') % (tables['population_table'], dem_sim_rates.base_population_id[0])

            pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])
            pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])

            pop = pop.join(pop_mil)
            pop['persons'] = (pop['persons'] - pop['mil_mildep'])
            pop = pop.reset_index(drop=False)
            pop = pop[pop['age'] >= 18]

            pop['age_cat'] = ''

            pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24'
            pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34'
            pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44'
            pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54'
            pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
            pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64'
            pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74'
            pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99'

            pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat']]).sum())

            pop.to_hdf('temp/data.h5', 'pop', mode='a')

Exemple #6

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

 def run(self):
     econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')
     lfpr = extract.create_df('lfp_rates',
                              'lfp_rates_table',
                              rate_id=econ_sim_rates.lfpr_id[0],
                              index=['yr', 'age_cat', 'sex', 'race_ethn'])
     lfpr.to_hdf('temp/data.h5', 'lfpr', mode='a')

Exemple #7

0

Afficher le fichier

    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))

        pop = pd.read_hdf('temp/data.h5', 'pop')
        inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', index=['yr', 'age_cat'])
        inc_type_rates = inc_type_rates.join(pop)
        inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share'])
        inc_type_rates = inc_type_rates.reset_index(drop=False)

        inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum())

        inc_type_rates = inc_type_rates.reset_index(drop=False)
        inc_type_rates = pd.pivot_table(inc_type_rates, values='totals',
                            index=['yr'],
                            columns=['income_type'])
        # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a')

        inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True)
        inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True)
        inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True)
        inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True)
        inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True)

        inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement',
                                         'Supplemental_Social_Security', 'Social_Security']]

        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]

        inc_type_rates['run_id'] = run_id
        inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)

Exemple #8

0

Afficher le fichier

 def run(self):
     mig_rates = extract.create_df('migration', 'migration_rate_table')
     mig_rates = mig_rates[['yr', 'DIN', 'FIN']]
     mig_rates.to_hdf('temp/data.h5',
                      'in_mig_rates',
                      format='table',
                      mode='a')

Exemple #9

0

Afficher le fichier

Fichier : defm_luigi.py Projet : jugalm/pydefm

    def run(self):
        aged_pop = pd.read_hdf('temp/data.h5', 'aged_pop')
        new_pop = pd.read_hdf('temp/data.h5', 'new_pop')
        rates = pd.read_hdf('temp/data.h5', 'ins_oth_rates')

        pop = aged_pop.join(new_pop)
        pop = pop.fillna(0)

        pop.loc[pop['type'].isin(['COL', 'INS', 'MIL', 'OTH']), ['new_pop']] = 0
        pop.loc[pop['mildep'].isin(['Y']), ['new_pop']] = 0

        pop = pop.reset_index(drop=False)

        rates = rates.reset_index(drop=False)

        pop = pop.set_index(['age', 'race_ethn', 'sex', 'mildep', 'type'])
        rates = rates.set_index(['age', 'race_ethn', 'sex', 'mildep', 'type'])

        pop = pop.join(rates)
        pop = pop.reset_index(drop=False)
        pop = pop.set_index(['age', 'race_ethn', 'sex'])

        pop = cp.final_population(pop)

        householder = extract.create_df('householder', 'householder_table')
        householder = householder[(householder['yr'] == self.year)]
        householder = householder.drop(['yr'], 1)

        pop = pop.join(householder)
        pop = pop.fillna(0)
        pop['households'] = (pop['persons'] * pop['householder_rate']).round()
        pop = pop.drop(['householder_rate'], 1)

        pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a')

Exemple #10

0

Afficher le fichier

Fichier : emp_luigi.py Projet : jugalm/pydefm

    def run(self):
        mil_pop = pd.read_hdf('temp/data.h5', 'mil_pop')
        mil_wages = pd.read_hdf('temp/data.h5', 'mil_pay')
        mil_wages = mil_wages.join(mil_pop)

        mil_wages['mil_gc_wages'] = (mil_wages['mil_gc_pop'] *
                                     mil_wages['annual_pay_gq']).round()
        mil_wages['mil_hh_wages'] = (mil_wages['mil_hh_pop'] *
                                     mil_wages['annual_pay_hp']).round()

        mil_wages['multiplier'] = 0

        aigrm_table = extract.create_df('aigrm', 'aigrm_table', index=None)

        mil_wages.loc[mil_wages.index.get_level_values('yr') > 2014,
                      ['multiplier']] = (
                          aigrm_table.aigrm[0] *
                          (mil_wages.index.get_level_values('yr') - 2014))
        # pow(1.01, mil_wages.index.get_level_values('yr') - 2014)

        mil_wages['mil_gc_wages'] = (
            mil_wages['mil_gc_wages'] +
            mil_wages['mil_gc_wages'] * mil_wages['multiplier'])
        mil_wages['mil_hh_wages'] = (
            mil_wages['mil_hh_wages'] +
            mil_wages['mil_hh_wages'] * mil_wages['multiplier'])

        mil_wages['military_income'] = (mil_wages['mil_gc_wages'] +
                                        mil_wages['mil_hh_wages']).round()

        mil_wages.to_hdf('temp/data.h5', 'mil_income', mode='a')

Exemple #11

0

Afficher le fichier

Fichier : defm_luigi.py Projet : jugalm/pydefm

    def run(self):
        birth_rates = pd.read_hdf('temp/data.h5', 'birth_rates')
        pop = pd.read_hdf('temp/data.h5', 'non_mig_pop')
        pop = pop[(pop['type'] == 'HHP') & (pop['mildep'] == 'N')]
        birth_rates = compute.rates_for_yr(pop, birth_rates, self.year)
        birth_rates = birth_rates[(birth_rates['yr'] == self.year)]
        random_numbers = extract.create_df('random_numbers', 'random_numbers_table')
        random_numbers = random_numbers[(random_numbers['yr'] == self.year)]
        random_numbers = random_numbers[['random_number']]
        births_per_cohort = compute.births_all(birth_rates, self.year, pop_col='non_mig_pop', rand_df=random_numbers)

        death_rates = pd.read_hdf('temp/data.h5', 'death_rates')
        death_rates = death_rates[(death_rates['yr'] == self.year)]
        # sum newborn population across cohorts
        newborn = compute.births_sum(births_per_cohort, self.year)

        newborn = newborn.join(death_rates)
        newborn['new_deaths'] = (newborn['new_born'] * newborn['death_rate']).round()
        newborn['new_born_survived'] = (newborn['new_born'] - newborn['new_deaths']).round()

        dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop')
        dead_pop = dead_pop.join(newborn['new_deaths'])

        dead_pop = dead_pop.fillna(0)
        dead_pop['deaths'] = (dead_pop['deaths'] + dead_pop['new_deaths']).round()

        dead_pop = dead_pop.drop(['new_deaths'], 1)

        dead_pop.to_hdf('temp/data.h5', 'dead_pop', format='table', mode='a')

        newborn = newborn.drop(['new_deaths', 'death_rate'], 1)
        newborn.to_hdf('temp/data.h5', 'new_born', format='table', mode='a')

Exemple #12

0

Afficher le fichier

    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        pop = pd.read_hdf('temp/data.h5', 'pop')
        inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', rate_id=econ_sim_rates.inc1_id[0], index=['yr', 'age_cat'])

        inc_type_rates = inc_type_rates.join(pop)
        inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share'])
        inc_type_rates = inc_type_rates.reset_index(drop=False)

        inc_type_rates['multiplier'] = 0

        aigr_table = extract.create_df('aigr', 'aigr_table', rate_id=econ_sim_rates.aigr_id[0], index=None)

        inc_type_rates.loc[inc_type_rates['yr'] > 2014, ['multiplier']] = (aigr_table.aigr[0] * (inc_type_rates['yr'] - 2014))

        # pow(1.01, mil_wages.index.get_level_values('yr') - 2014)

        inc_type_rates['totals'] = (inc_type_rates['totals'] + inc_type_rates['totals'] * inc_type_rates['multiplier'])

        inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum())

        inc_type_rates = inc_type_rates.reset_index(drop=False)
        inc_type_rates = pd.pivot_table(inc_type_rates, values='totals',
                            index=['yr'],
                            columns=['income_type'])
        # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a')

        inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True)
        inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True)
        inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True)
        inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True)
        inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'semp': 'Selfemp_Income'}, inplace=True)

        inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement',
                                         'Supplemental_Social_Security', 'Social_Security', 'Selfemp_Income']]

        inc_type_rates.to_hdf('temp/data.h5', 'ue_income')
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]

        inc_type_rates['run_id'] = run_id
        inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)

Exemple #13

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

    def run(self):
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        mil_pay = extract.create_df('mil_pay',
                                    'mil_pay_table',
                                    rate_id=econ_sim_rates.mp_id[0],
                                    index=['yr'])
        mil_pay.to_hdf('temp/data.h5', 'mil_pay', mode='a')

Exemple #14

0

Afficher le fichier

Fichier : emp_luigi.py Projet : lethphd/pydefm

 def run(self):
     out_commuting = extract.create_df('out_commuting', 'out_commuting_table', index=['yr'])
     work_force = pd.read_hdf('temp/data.h5', 'work_force')
     work_force = work_force.reset_index(drop=False)
     work_force = pd.DataFrame(work_force[['labor_force', 'unemployed', 'work_force']].groupby([work_force['yr']]).sum())
     work_force = work_force.join(out_commuting)
     work_force['work_force_outside'] = (work_force['work_force'] * work_force['wtlh_lh']).round()
     work_force['work_force_local'] = (work_force['work_force'] - work_force['work_force_outside']).round()
     work_force.to_hdf('temp/data.h5', 'local_work_force', mode='a')

Exemple #15

0

Afficher le fichier

    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print 'File exists'
        else:
            db_run_id = log.new_run()
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id', mode='a')
            pop = extract.create_df('population', 'population_table')
            pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a')

            pop2 = pop[(pop['type'] == 'HHP')]
            pop2 = pop2.reset_index(drop=False)

            pop2 = pd.DataFrame(pop2['persons'].groupby(
                [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum())
            pop2.rename(columns={'persons': 'persons_sum'}, inplace=True)

            pop2 = pop.join(pop2)
            pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']),
                                     (pop2['persons'] / pop2['persons_sum']),
                                     0)

            rates = pop2[['mildep', 'type', 'rates']]
            rates.to_hdf('temp/data.h5',
                         'ins_oth_rates',
                         format='table',
                         mode='a')

            engine = create_engine(
                get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({
                'Year': self.year - 1,
                'Run_id': run_id[0],
                'Population': pop['persons'].sum(),
                'mig_out': 0,
                'mig_in': 0,
                'deaths': 0,
                'new_born': 0
            })

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary',
                              con=engine,
                              schema='defm',
                              if_exists='append',
                              index=False)
            pop['yr'] = 2010
            pop['run_id'] = db_run_id
            pop.to_sql(name='population',
                       con=engine,
                       schema='defm',
                       if_exists='append',
                       index=True)

Exemple #16

0

Afficher le fichier

Fichier : combined_simulation.py Projet : figo2002/pydefm

def my_form():

    econ_sim_ids = extract.create_df('econ_sim_ids',
                                     'econ_sim_ids_table',
                                     rate_id=0,
                                     index=None)
    dem_sim_ids = extract.create_df('dem_sim_ids',
                                    'dem_sim_ids_table',
                                    rate_id=0,
                                    index=None)
    dems = zip(dem_sim_ids['demographic_simulation_id'],
               dem_sim_ids['desc_short'])
    econs = econ_sim_ids['economic_simulation_id'].tolist()
    startyear = range(2011, 2050)
    endyear = range(2012, 2051)
    return render_template("my-form.html",
                           result1=dems,
                           result2=econs,
                           startyear=startyear,
                           endyear=endyear)

Exemple #17

0

Afficher le fichier

    def run(self):
        # create file only first year, exists in subsequent years
        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else: # only first year
            db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id)
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')

            dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table',
                                              rate_id=self.dem_id, index=None)

            dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates',  mode='a')

            pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0])
            pop.to_hdf('temp/data.h5', 'pop', mode='a')

            # Create function here and test
            # to get ratio of INS and OTH to HHP to keep constant
            rates = cp.compute_ins_oth_rate(pop)

            rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a')

            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({'Year': self.year - 1,
                                       'Run_id': run_id[0],
                                       'Population': pop['persons'].sum(),
                                       'mig_out': 0,
                                       'mig_in': 0,
                                       'deaths_hhp_non_mil': 0,
                                       'new_born': 0})

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
            pop['yr'] = self.year - 1
            pop['run_id'] = db_run_id
            pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)

Exemple #18

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

    def run(self):
        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        trs_rates = extract.create_df('trs',
                                      'trs_table',
                                      rate_id=econ_sim_rates.trs_id[0],
                                      index=['yr'])

        hh_income = pd.read_hdf('temp/data.h5', 'hh_income')
        mil_income = pd.read_hdf('temp/data.h5', 'mil_income')
        ue_income = pd.read_hdf('temp/data.h5', 'ue_income')
        inc = hh_income.join(mil_income)
        inc = inc.join(ue_income)
        inc = inc.join(trs_rates)

        inc['unearned_income'] = (inc['Interest'] + inc['Other'] +
                                  inc['Public_Assistance'] +
                                  inc['Retirement'] +
                                  inc['Supplemental_Social_Security'] +
                                  inc['Social_Security']).round()

        inc['personal_income'] = (inc['jobs_local_wages'] +
                                  inc['wf_outside_wages'] +
                                  inc['unearned_income'] +
                                  inc['Selfemp_Income'] +
                                  inc['military_income']).round()

        inc['taxable_retail_sales'] = (inc['personal_income'] *
                                       inc['trs_pct']).round()

        inc = inc[[
            'labor_force', 'unemployed', 'work_force', 'work_force_outside',
            'work_force_local', 'jobs_local', 'jobs_total', 'jobs_external',
            'avg_wage', 'jobs_total_wages', 'jobs_local_wages',
            'jobs_external_wages', 'wf_outside_wages', 'military_income',
            'unearned_income', 'Selfemp_Income', 'personal_income',
            'taxable_retail_sales'
        ]]

        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        inc['run_id'] = run_id

        inc.to_sql(name='emp_summary',
                   con=engine,
                   schema='defm',
                   if_exists='append',
                   index=True)

Exemple #19

0

Afficher le fichier

 def run(self):
     dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates')
     mig_rates = extract.create_df('migration', 'migration_rate_table', rate_id=dem_sim_rates.migration_rate_id[0])
     mig_rates = mig_rates[['yr', 'DOUT', 'FOUT']]
     mig_rates.to_hdf('temp/data.h5', 'out_mig_rates', mode='a')

Exemple #20

0

Afficher le fichier

Fichier : emp_luigi.py Projet : lethphd/pydefm

 def run(self):
     lfpr = extract.create_df('lfp_rates', 'lfp_rates_table', index=['yr', 'age_cat', 'sex', 'race_ethn'])
     lfpr.to_hdf('temp/data.h5', 'lfpr', mode='a')

Exemple #21

0

Afficher le fichier

Fichier : defm_luigi.py Projet : jugalm/pydefm

 def run(self):
     birth_rates = extract.create_df('birth', 'birth_rate_table')
     birth_rates.to_hdf('temp/data.h5', 'birth_rates', format='table', mode='a')

Exemple #22

0

Afficher le fichier

Fichier : defm.py Projet : SANDAG/pydefm

os.chdir(os.path.dirname(full_path))

# set console display to show MultiIndex for every row
pd.set_option('display.multi_sparse', False)

# rate versions to result database & return primary key for table
# db_run_id = log.new_run('model_summary.db')
db_run_id = log.new_run('defm.db')

years = util.yaml_to_dict('model_config.yml', 'years')

# Load rates for all years: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort), 'rate', 'year'
#   pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out

birth_rates = extract.create_df('birth', 'rate_table')
death_rates = extract.create_df('death', 'rate_table')
mig_rates = extract.create_df('migration', 'rate_table', pivot=True)


# Load base population: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'

population = extract.create_df('population', 'population_table')

# special case ratios
ins_ratio = extract.create_df('ins', 'rate_table')

oth_ratio = extract.create_df('oth', 'rate_table')

Exemple #23

0

Afficher le fichier

os.chdir(os.path.dirname(full_path))

# set console display to show MultiIndex for every row
pd.set_option('display.multi_sparse', False)

# rate versions to result database & return primary key for table
# db_run_id = log.new_run('model_summary.db')
db_run_id = log.new_run()

years = util.yaml_to_dict('model_config.yml', 'years')

# Load rates for all years: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort), 'rate', 'year'
#   pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out

birth_rates = extract.create_df('birth', 'rate_table')
death_rates = extract.create_df('death', 'rate_table')
mig_rates = extract.create_df('migration', 'rate_table', pivot=True)

# Load base population: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'

population = extract.create_df('population', 'population_table')

# special case ratios
ins_ratio = extract.create_df('ins', 'rate_table')

oth_ratio = extract.create_df('oth', 'rate_table')

# base population to result database with year

Exemple #24

0

Afficher le fichier

Fichier : emp_luigi.py Projet : jugalm/pydefm

 def run(self):
     mil_pay = extract.create_df('mil_pay', 'mil_pay_table', index=['yr'])
     mil_pay.to_hdf('temp/data.h5', 'mil_pay', mode='a')

Exemple #25

0

Afficher le fichier

 def run(self):
     dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates')
     birth_rates = extract.create_df('birth', 'birth_rate_table', rate_id=dem_sim_rates.birth_rate_id[0])
     birth_rates.to_hdf('temp/data.h5', 'birth_rates', mode='a')

Exemple #26

0

Afficher le fichier

Fichier : emp_luigi.py Projet : figo2002/pydefm

    def run(self):

        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))
        db_connection_string = database.get_connection_string(
            'model_config.yml', 'in_db')
        sql_in_engine = create_engine(db_connection_string)

        in_query = getattr(sql, 'max_run_id')
        db_run_id = pd.read_sql(in_query, engine, index_col=None)
        # db_run_id = log.new_run(name='emp_run_log', run_id=db_run_id['max'].iloc[0])

        run_id = pd.Series([db_run_id['id'].iloc[0]])
        run_id.to_hdf('temp/data.h5', 'run_id', mode='a')

        tables = util.yaml_to_dict('model_config.yml', 'db_tables')

        dem_sim_rates = extract.create_df('dem_sim_rates',
                                          'dem_sim_rates_table',
                                          rate_id=self.dem_id,
                                          index=None)
        dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a')

        econ_sim_rates = extract.create_df('econ_sim_rates',
                                           'econ_sim_rates_table',
                                           rate_id=self.econ_id,
                                           index=None)
        econ_sim_rates.to_hdf('temp/data.h5', 'econ_sim_rates', mode='a')

        in_query = getattr(sql,
                           'inc_pop') % (tables['inc_pop_table'], run_id[0])
        in_query2 = getattr(sql, 'inc_mil_hh_pop') % (
            tables['population_table'], dem_sim_rates.base_population_id[0])

        pop = pd.read_sql(in_query,
                          engine,
                          index_col=['age', 'race_ethn', 'sex', 'mildep'])
        pop_mil = pd.read_sql(in_query2,
                              sql_in_engine,
                              index_col=['age', 'race_ethn', 'sex', 'mildep'])

        pop = pop.join(pop_mil)
        pop['persons'] = (pop['persons'] - pop['mil_mildep'])
        pop = pop.reset_index(drop=False)

        pop['age_cat'] = ''
        pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04'
        pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09'
        pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14'
        pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17'
        pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19'
        pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20'
        pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21'
        pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24'
        pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29'
        pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34'
        pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39'
        pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44'
        pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49'
        pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54'
        pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
        pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61'
        pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64'
        pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66'
        pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69'
        pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74'
        pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79'
        pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84'
        pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99'

        pop = pd.DataFrame(pop['persons'].groupby(
            [pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum())
        pop.to_hdf('temp/data.h5', 'pop', mode='a')