예제 #1
0
파일: compute.py 프로젝트: lethphd/pydefm
def deaths(df, db_id, sim_year):
    """
     Calculate deaths by applying death rates to non-migrating population

    Parameters
    ----------
    df : pandas DataFrame
        population and death rates for current yr
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    df : pandas DataFrame
        survived population per cohort for a given year

    """
    df['deaths'] = (df['non_mig_pop'] * df['death_rate']).round()
    # deaths_out = df[df.deaths != 0]
    # report out deaths
    # log.insert_run('deaths.db', db_id, df, 'survived_' + str(sim_year))
    deaths_out = df[df.deaths != 0].copy()
    deaths_out = deaths_out.drop(['mig_in_net'], 1)

    log.insert_run('defm.db', db_id, deaths_out, 'deaths_by_cohort_by_age')

    deaths_out = deaths_out.reset_index(drop=False)

    deaths_out = deaths_out.drop(
        ['non_mig_pop', 'death_rate', 'age', 'households'], 1)

    deaths_grouped = deaths_out.groupby(
        ['yr', 'race_ethn', 'mildep', 'sex', 'type'], as_index=False).sum()

    # log.insert_run('defm.db', db_id, deaths_grouped, 'deaths_sum_by_age')

    # log.insert_run('defm.db', db_id, df, 'deaths')

    # SPECIAL CASES
    # deaths not carried over into next year
    df['survived'] = np.where(
        ((df['type'] == 'HP') & (df['mildep'] == 'Y'))
        | df['type'].isin(['COL', 'INS', 'MIL', 'OTH']),
        df['non_mig_pop'],  # special case
        df['non_mig_pop'] - df['deaths'])  # else

    # drop other unnecessary columns
    survived_out = df[df.non_mig_pop != 0].copy()
    survived_out = survived_out.drop(['mig_in_net'], 1)
    log.insert_run('defm.db', db_id, survived_out, 'survived')

    df = df.drop(['deaths', 'yr', 'death_rate', 'non_mig_pop'], 1)
    return df
예제 #2
0
파일: compute.py 프로젝트: lethphd/pydefm
def net_mig(df, db_id, sim_year):
    """
    Calculate net migration by applying rates to population

    Parameters
    ----------
    df : pandas.DataFrame
        with population and migration rates for current yr
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    df : pandas DataFrame
        In and out migrating population per cohort for a given year
            population x  migration rate, where rates are:
                domestic in (DIN), domestic out (DOUT),
                foreign in (FIN), and foreign out (FOUT)

    """
    # SPECIAL CASE: no migration, set rates to zero

    # when group quarters = "HP" and mildep = "Y"
    df.loc[((df.type == 'HP') & (df.mildep == 'Y')),
           ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0

    # when group quarters equal COL, INS, MIL, or OTH
    df.loc[df['type'].isin(['COL', 'INS', 'MIL', 'OTH']),
           ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0

    # calculate net migration
    df['mig_Dout'] = (df['persons'] * df['DOUT']).round()
    df['mig_Fout'] = (df['persons'] * df['FOUT']).round()
    df['mig_Din'] = (df['persons'] * df['DIN']).round()
    df['mig_Fin'] = (df['persons'] * df['FIN']).round()
    df['mig_out_net'] = df['mig_Dout'] + df['mig_Fout']
    df['mig_in_net'] = df['mig_Din'] + df['mig_Fin']

    net_mig_db = df[(df.DIN != 0) | (df.DOUT != 0) | (df.FIN != 0) |
                    (df.FOUT != 0)].copy()
    log.insert_run('defm.db', db_id, net_mig_db, 'net_migration')

    return df
예제 #3
0
파일: compute.py 프로젝트: SANDAG/pydefm
def net_mig(df, db_id, sim_year):
    """
    Calculate net migration by applying rates to population

    Parameters
    ----------
    df : pandas.DataFrame
        with population and migration rates for current yr
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    df : pandas DataFrame
        In and out migrating population per cohort for a given year
            population x  migration rate, where rates are:
                domestic in (DIN), domestic out (DOUT),
                foreign in (FIN), and foreign out (FOUT)

    """
    # SPECIAL CASE: no migration, set rates to zero

    # when group quarters = "HP" and mildep = "Y"
    df.loc[((df.type == 'HP') & (df.mildep == 'Y')),
           ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0

    # when group quarters equal COL, INS, MIL, or OTH
    df.loc[df['type'].isin(['COL', 'INS', 'MIL', 'OTH']),
           ['DIN', 'DOUT', 'FIN', 'FOUT']] = 0

    # calculate net migration
    df['mig_Dout'] = (df['persons'] * df['DOUT']).round()
    df['mig_Fout'] = (df['persons'] * df['FOUT']).round()
    df['mig_Din'] = (df['persons'] * df['DIN']).round()
    df['mig_Fin'] = (df['persons'] * df['FIN']).round()
    df['mig_out_net'] = df['mig_Dout'] + df['mig_Fout']
    df['mig_in_net'] = df['mig_Din'] + df['mig_Fin']

    net_mig_db = df[(df.DIN != 0) | (df.DOUT != 0) | (df.FIN != 0) | (df.FOUT != 0)].copy()
    log.insert_run('defm.db', db_id, net_mig_db, 'net_migration')

    return df
예제 #4
0
파일: compute.py 프로젝트: lethphd/pydefm
def non_mig(nm_df, db_id, sim_year):
    """
    Calculate non-migration population by subtracting net out migrating from population

    Parameters
    ----------
    nm_df : pandas.DataFrame
        with population for current yr
        and population migrating in & out
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    nm_df : pandas DataFrame
        non-migrating population per cohort for a given year

    """
    nm_df['non_mig_pop'] = nm_df['persons'] - nm_df['mig_out_net']
    # drop  unnecessary columns
    nm_df = nm_df[[
        'type', 'mildep', 'households', 'persons', 'mig_out_net',
        'non_mig_pop', 'mig_in_net', 'yr'
    ]]

    # record non migration population in result database
    # remove rows that have zero population
    nm_db = nm_df[nm_df.non_mig_pop != 0].copy()
    nm_db = nm_db.drop(['mig_in_net'], 1)

    log.insert_run('defm.db', db_id, nm_db, 'non_migrating')

    # drop year column in order to join w birth and death rates
    nm_df = nm_df.drop(['yr', 'persons', 'mig_out_net'], 1)

    return nm_df
예제 #5
0
파일: compute.py 프로젝트: SANDAG/pydefm
def non_mig(nm_df, db_id, sim_year):
    """
    Calculate non-migration population by subtracting net out migrating from population

    Parameters
    ----------
    nm_df : pandas.DataFrame
        with population for current yr
        and population migrating in & out
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    nm_df : pandas DataFrame
        non-migrating population per cohort for a given year

    """
    nm_df['non_mig_pop'] = nm_df['persons'] - nm_df['mig_out_net']
    # drop  unnecessary columns
    nm_df = nm_df[['type', 'mildep','households','persons','mig_out_net','non_mig_pop','mig_in_net','yr']]

    # record non migration population in result database
    # remove rows that have zero population
    nm_db = nm_df[nm_df.non_mig_pop != 0].copy()
    nm_db = nm_db.drop(['mig_in_net'],1)

    log.insert_run('defm.db', db_id, nm_db, 'non_migrating')

    # drop year column in order to join w birth and death rates
    nm_df = nm_df.drop(['yr','persons','mig_out_net'],1)


    return nm_df
예제 #6
0
파일: compute.py 프로젝트: SANDAG/pydefm
def deaths(df, db_id, sim_year):
    """
     Calculate deaths by applying death rates to non-migrating population

    Parameters
    ----------
    df : pandas DataFrame
        population and death rates for current yr
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    df : pandas DataFrame
        survived population per cohort for a given year

    """
    df['deaths'] = (df['non_mig_pop'] * df['death_rate']).round()
    # deaths_out = df[df.deaths != 0]
    # report out deaths
    # log.insert_run('deaths.db', db_id, df, 'survived_' + str(sim_year))
    deaths_out = df[df.deaths != 0].copy()
    deaths_out = deaths_out.drop(['mig_in_net'], 1)

    log.insert_run('defm.db', db_id, deaths_out, 'deaths_by_cohort_by_age')

    deaths_out = deaths_out.reset_index(drop=False)

    deaths_out = deaths_out.drop(['non_mig_pop', 'death_rate', 'age','households'], 1)

    deaths_grouped = deaths_out.groupby(['yr', 'race_ethn', 'mildep', 'sex',
                              'type'], as_index=False).sum()

    log.insert_run('defm.db', db_id, deaths_grouped, 'deaths_sum_by_age')

    # log.insert_run('defm.db', db_id, df, 'deaths')

    # SPECIAL CASES
    # deaths not carried over into next year
    df['survived'] = np.where(
        ((df['type'] == 'HP') & (df['mildep'] == 'Y')) |
        df['type'].isin(['COL','INS','MIL','OTH']),
        df['non_mig_pop'],  # special case
        df['non_mig_pop'] - df['deaths'])  # else

    # drop other unnecessary columns
    survived_out = df[df.non_mig_pop != 0].copy()
    survived_out = survived_out.drop(['mig_in_net'], 1)
    log.insert_run('defm.db', db_id, survived_out, 'survived')

    df = df.drop(['deaths', 'yr', 'death_rate', 'non_mig_pop'], 1)
    return df
예제 #7
0
파일: defm.py 프로젝트: SANDAG/pydefm
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'

population = extract.create_df('population', 'population_table')

# special case ratios
ins_ratio = extract.create_df('ins', 'rate_table')

oth_ratio = extract.create_df('oth', 'rate_table')


# base population to result database with year
# log.insert_run('base_defm.db', db_run_id, population,'base_population'
y0 = years['y1'] - 1
population['yr'] = y0
log.insert_run('defm.db', db_run_id, population, 'population')
population = population.drop(['yr'], 1)


# years to be used in model

population_summary = []  # initialize list for population by year

# iterate over all years
for index, yr in enumerate(range(years['y1'],years['yf'] + 1)):

    print ('{} {}'.format(index, yr))

    # MIGRATION

    # rates for simulated yr joined with population DataFrame
예제 #8
0
# Load base population: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'

population = extract.create_df('population', 'population_table')

# special case ratios
ins_ratio = extract.create_df('ins', 'rate_table')

oth_ratio = extract.create_df('oth', 'rate_table')

# base population to result database with year
# log.insert_run('base_defm.db', db_run_id, population,'base_population'
y0 = years['y1'] - 1
population['yr'] = y0
log.insert_run('defm.db', db_run_id, population, 'population')
population = population.drop(['yr'], 1)

# years to be used in model

population_summary = []  # initialize list for population by year

# iterate over all years
for index, yr in enumerate(range(years['y1'], years['yf'] + 1)):

    print('{} {}'.format(index, yr))

    # MIGRATION

    # rates for simulated yr joined with population DataFrame
    yr_mig = compute.rates_for_yr(population, mig_rates, yr)
예제 #9
0
파일: compute.py 프로젝트: SANDAG/pydefm
def births_sum(df,db_id,sim_year):
    """
    Sum births over all the ages in a given cohort
    Set birth age to zero and reset DataFrame index

    Parameters
    ----------
    df : pandas DataFrame
        male and female births for each cohort and non-migrating population
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    births_age0 : pandas DataFrame
        births summed across age for each cohort

    """
    df = df.reset_index(drop=False)

    df = df[['yr', 'race_ethn', 'mildep','type','births_m','births_f']]

    births_grouped = df.groupby(['yr', 'race_ethn', 'mildep',
                              'type'], as_index=False).sum()

    male_births = births_grouped.copy()
    male_births.rename(columns={'births_m': 'persons'}, inplace=True)
    male_births['sex'] = 'M'
    male_births['age'] = 0
    male_births = male_births.set_index(['age','race_ethn','sex'])
    male_births = male_births.drop('births_f',1)

    female_births = births_grouped.copy()
    female_births.rename(columns={'births_f': 'persons'}, inplace=True)
    female_births['sex'] = 'F'
    female_births['age'] = 0
    female_births = female_births.set_index(['age','race_ethn','sex'])
    female_births = female_births.drop('births_m',1)

    births_mf = pd.concat([male_births, female_births], axis=0)

    births_mf['households'] = 0  # temp ignore households

    # no births for this special case
    births_mf = births_mf[-births_mf['type'].isin(['COL','MIL','INS','OTH'])]

    newborns = births_mf[births_mf.persons != 0].copy()
    newborns.rename(columns={'persons': 'newborns'}, inplace=True)
    newborns = newborns.drop('households', 1)

    log.insert_run('defm.db', db_id, newborns, 'newborns')
#    log.insert_run('newborns.db', db_id, births_mf, 'newborns_' +
#                   str(sim_year))
    births_mf = births_mf.drop('yr', 1)

    # SPECIAL CASE:
    # Births are estimated & reported out, but are not carried over into the
    # next year ( base_population.type="HP" and base_population.mildep="Y")
    # keep rows in which either type != 'HP' OR mildep != 'Y'
    # which results in dropping rows  where type = 'HP' AND mildep = 'Y'
    births_mf = births_mf[((births_mf.type != 'HP') | (births_mf.mildep != 'Y'))]


    return births_mf
예제 #10
0
파일: compute.py 프로젝트: SANDAG/pydefm
def births_all(b_df, db_id, sim_year):
    """
    Calculate births for given year based on rates.
    Predict male births as 51% of all births & female births as 49%.
    Result is nearest integer (floor) after +0 or +0.5 (randomly generated)

    Parameters
    ----------
    b_df : pandas.DataFrame
        with population for current yr and birth rates
    db_id : int
        primary key for current simulation
    sim_year : int
        year being simulated

    Returns
    -------
    b_df : pandas DataFrame
        male and female births by cohort (race_ethn and age)

    """
    # SPECIAL CASE: no births, set rates to zero

    # when group quarters in ("COL","INS","MIL","OTH")
    b_df.loc[b_df['type'].isin(['COL','INS','MIL','OTH']), ['birth_rate']] = 0

    # total births =  population * birth rate (fill blanks w zero)
    b_df['births_rounded'] = (b_df['non_mig_pop'] *
                              b_df['birth_rate']).fillna(0.0)
    b_df = b_df.round({'births_rounded': 0})

    # note: no longer works after pandas 0.18.0 - use above code
    # b_df['births_rounded'] = np.round(
    #    b_df['non_mig_pop'] * b_df['birth_rate']).fillna(0.0).astype(int)

    # male births 51%
    b_df['births_m_float'] = b_df['births_rounded'] * 0.51

    # 0 or 0.5 generated randomly by multiplying 0 or 1 by 0.5
    np.random.seed(2010)
    b_df['randomNumCol'] = 0.5 * np.random.randint(2, size=b_df.shape[0])

    # Add random 0 or 0.5
    # Convert to int which truncates float (floor)
    b_df['births_m'] = b_df['births_m_float'] + b_df['randomNumCol']
    b_df['births_m'] = b_df['births_m'].astype(int)

    # female births
    b_df['births_f'] = b_df['births_rounded'] - b_df['births_m']

    # remove rows w no birth rate
    # use yr column since yr column in original birth rates DataFrame
    b_df_notnull = b_df[b_df.yr.notnull()].copy()

    #log.insert_run('births.db', db_id, b_df_notnull,
    #               'births_' + str(sim_year))
    # Remove zero rows
    births_db = b_df_notnull[(b_df_notnull.births_m != 0) | (b_df_notnull.births_m != 0)].copy()
    births_db = births_db.reset_index(drop=False)
    births_db = births_db.drop('mig_in_net',1)
    births_db = births_db.drop('sex',1)
    births_db.rename(columns={'births_m': 'male births'}, inplace=True)
    births_db.rename(columns={'births_f': 'female births'}, inplace=True)
    births_db.rename(columns={'randomNumCol': 'add (random) then floor'}, inplace=True)
    births_db.rename(columns={'age': 'mother age'}, inplace=True)
    births_db.rename(columns={'race_ethn': 'mother race_ethn'}, inplace=True)
    births_db.rename(columns={'births_rounded': 'total births'}, inplace=True)
    births_db.rename(columns={'births_m_float': 'male births (float)'}, inplace=True)

    log.insert_run('defm.db', db_id, births_db, 'mothers_n_births')

    return b_df_notnull