Example #1
0
def test_read_file_from_egg():
    '''
    Test of utils.read_file() function, case of reading file from .egg
    '''
    path = os.path.join(CUR_PATH)
    fname = 'default_parameters.json'
    bytes_data = utils.read_file(path, fname)

    assert isinstance(bytes_data, io.StringIO)
Example #2
0
def test_read_file():
    '''
    Test of utils.read_file() function
    '''
    path = os.path.join(CUR_PATH, 'test_io_data')
    fname = 'SS_fsolve_inputs.pkl'
    bytes_data = utils.read_file(path, fname)

    assert isinstance(bytes_data, io.TextIOWrapper)
Example #3
0
def ss_profiles(base_ss, base_params, reform_ss=None,
                reform_params=None, by_j=True, var='nssmat',
                plot_data=False,
                plot_title=None, path=None):
    '''
    Plot lifecycle profiles of given variable in the SS.

    Args:
        base_ss (dictionary): SS output from baseline run
        base_params (OG-USA Specifications class): baseline parameters
            object
        reform_ss (dictionary): SS output from reform run
        reform_params (OG-USA Specifications class): reform parameters
            object
        var (string): name of variable to plot
        plot_data (bool): whether to plot data values for given variable
        plot_title (string): title for plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of lifecycle profiles

    '''
    if reform_ss:
        assert (base_params.S == reform_params.S)
        assert (base_params.starting_age == reform_params.starting_age)
        assert (base_params.ending_age == reform_params.ending_age)
    age_vec = np.arange(base_params.starting_age,
                        base_params.starting_age + base_params.S)
    fig1, ax1 = plt.subplots()
    if by_j:
        cm = plt.get_cmap('coolwarm')
        ax1.set_prop_cycle(color=[cm(1. * i / 7) for i in range(7)])
        for j in range(base_params.J):
            plt.plot(age_vec, base_ss[var][:, j],
                     label='Baseline, j = ' + str(j))
            if reform_ss:
                plt.plot(age_vec, reform_ss[var][:, j],
                         label='Reform, j = ' + str(j), linestyle='--')
    else:
        base_var = (
            base_ss[var][:, :] *
            base_params.lambdas.reshape(1, base_params.J)).sum(axis=1)
        plt.plot(age_vec, base_var, label='Baseline')
        if reform_ss:
            reform_var = (
                reform_ss[var][:, :] *
                reform_params.lambdas.reshape(1, reform_params.J)).sum(axis=1)
            plt.plot(age_vec, reform_var, label='Reform', linestyle='--')
        if plot_data:
            assert var == 'nssmat'
            labor_file = utils.read_file(
                cur_path, "data/labor/cps_hours_by_age_hourspct.txt")
            data = pd.read_csv(labor_file, header=0, delimiter='\t')
            piv = data.pivot(index='age', columns='hours_pct',
                             values='mean_hrs')
            lab_mat_basic = np.array(piv)
            lab_mat_basic /= np.nanmax(lab_mat_basic)
            piv2 = data.pivot(index='age', columns='hours_pct',
                              values='num_obs')
            weights = np.array(piv2)
            weights /= np.nansum(weights, axis=1).reshape(
                60, 1)
            weighted = np.nansum((lab_mat_basic * weights), axis=1)
            weighted = np.append(weighted, np.zeros(20))
            weighted[60:] = np.nan
            plt.plot(age_vec, weighted, linewidth=2.0, label='Data',
                     linestyle=':')
    plt.xlabel(r'Age')
    plt.ylabel(VAR_LABELS[var])
    plt.legend(loc=9, bbox_to_anchor=(0.5, -0.15), ncol=2)
    if plot_title:
        plt.title(plot_title, fontsize=15)
    if path:
        fig_path1 = os.path.join(path)
        plt.savefig(fig_path1, bbox_inches="tight")
    else:
        return fig1
    plt.close()
Example #4
0
def get_imm_resid(totpers, min_yr, max_yr, graph=True):
    '''
    --------------------------------------------------------------------
    Calculate immigration rates by age as a residual given population
    levels in different periods, then output average calculated
    immigration rate. We have to replace the first mortality rate in
    this function in order to adjust the first implied immigration rate
    (Source: Population data come from Annual Estimates of the Resident
    Population by Single Year of Age and Sex: April 1, 2010 to July 1,
    2013 (Both sexes) National Characteristics, Vintage 2013, US Census
    Bureau,
    http://www.census.gov/popest/data/national/asrh/2013/index.html)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utils.read_file()
        get_fert()
        get_mort()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    cur_path      = string, path in which calling file resides
    pop_file      = string, path of population data source csv file
    pop_data      = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                    Pop2013, for ages 0 to 100
    pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                    Pop2013, for ages 0 to 99
    age_year_all  = (100,) vector, ages by year from data (beg per is 1)
    pop_2010      = (100,) vector, population for ages 0 to 99 in 2010
    pop_2011      = (100,) vector, population for ages 0 to 99 in 2011
    pop_2012      = (100,) vector, population for ages 0 to 99 in 2012
    pop_2013      = (100,) vector, population for ages 0 to 99 in 2013
    imm_mat       = (3, 100) matrix, immigration rates computed as
                    residuals for each age in three successive pairs of
                    years
    pop11vec      = (3,) vector, age-1 population in first three years
    pop21vec      = (3,) vector, age-1 population in last three years
    fert_rates    = (100,) vector, fertility rates by model age
    mort_rates    = (100,) vector, mortality rates by model age
    infmort_rate  = scalar > 0, infant mortality rate from 2015 U.S. CIA
                    World Factbook
    newbornvec    = (3,) vector, total births in first three years
    pop11mat      = (3, 99) matrix, population of age 1 through 99 for
                    first three years
    pop12mat      = (3, 99) matrix, population of age 2 through 100 for
                    first three years
    pop22mat      = (3, 99) matrix, population of age 2 through 100 for
                    last three years
    mort_mat      = (3, 99) matrix, the first 99 mortality rates copied
                    into 3 rows
    imm_rates_all = (100,) vector, average of three years residual
                    immigration rates by each age in data
    imm_func      = function, generated by interp1d function, takes
                    ages and returns the interpolated immigration rates
    age_per       = (E+S,) vector, age in years at each period of life
    imm_rates     = (E+S,) vector, immigration rates that correspond to
                    each period of life

    RETURNS: imm_rates
    --------------------------------------------------------------------
    '''
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                               "data/demographic/pop_data.csv")
    pop_data = pd.read_csv(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) &
                             (pop_data['Age'] <= max_yr - 1)]
    pop_2010, pop_2011, pop_2012, pop_2013 = (
        np.array(pop_data_samp['2010'], dtype='f'),
        np.array(pop_data_samp['2011'], dtype='f'),
        np.array(pop_data_samp['2012'], dtype='f'),
        np.array(pop_data_samp['2013'], dtype='f'))
    pop_2010_EpS = pop_rebin(pop_2010, totpers)
    pop_2011_EpS = pop_rebin(pop_2011, totpers)
    pop_2012_EpS = pop_rebin(pop_2012, totpers)
    pop_2013_EpS = pop_rebin(pop_2013, totpers)
    # Create three years of estimated immigration rates for youngest age
    # individuals
    imm_mat = np.zeros((3, totpers))
    pop11vec = np.array([pop_2010_EpS[0], pop_2011_EpS[0],
                         pop_2012_EpS[0]])
    pop21vec = np.array([pop_2011_EpS[0], pop_2012_EpS[0],
                         pop_2013_EpS[0]])
    fert_rates = get_fert(totpers, min_yr, max_yr, False)
    mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False)
    newbornvec = np.dot(fert_rates, np.vstack((pop_2010_EpS,
                                               pop_2011_EpS,
                                               pop_2012_EpS)).T)
    imm_mat[:, 0] = ((pop21vec - (1 - infmort_rate) * newbornvec) /
                     pop11vec)
    # Estimate 3 years of immigration rates for all other-aged
    # individuals
    pop11mat = np.vstack((pop_2010_EpS[:-1], pop_2011_EpS[:-1],
                          pop_2012_EpS[:-1]))
    pop12mat = np.vstack((pop_2010_EpS[1:], pop_2011_EpS[1:],
                          pop_2012_EpS[1:]))
    pop22mat = np.vstack((pop_2011_EpS[1:], pop_2012_EpS[1:],
                          pop_2013_EpS[1:]))
    mort_mat = np.tile(mort_rates[:-1], (3, 1))
    imm_mat[:, 1:] = (pop22mat - (1 - mort_mat) * pop11mat) / pop12mat
    # Final estimated immigration rates are the averages over 3 years
    imm_rates = imm_mat.mean(axis=0)
    age_per = np.linspace(1, totpers, totpers)

    if graph:
        '''
        ----------------------------------------------------------------
        output_fldr = string, path of the OUTPUT folder from cur_path
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        fig, ax = plt.subplots()
        plt.scatter(age_per, imm_rates, s=40, c='red', marker='d')
        plt.plot(age_per, imm_rates)
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        # plt.title('Fitted immigration rates by age ($i_{s}$), residual',
        #     fontsize=20)
        plt.xlabel(r'Age $s$ (model periods)')
        plt.ylabel(r'Imm. rate $i_{s}$')
        plt.xlim((0, totpers + 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) is False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "imm_rates_orig")
        plt.savefig(output_path)
        # plt.show()

    return imm_rates
Example #5
0
def get_pop_objs(E, S, T, min_yr, max_yr, curr_year, GraphDiag=True):
    '''
    --------------------------------------------------------------------
    This function produces the demographics objects to be used in the
    OG-USA model package.
    --------------------------------------------------------------------
    INPUTS:
    E         = integer >= 1, number of model periods in which agent is
                not economically active
    S         = integer >= 3, number of model periods in which agent is
                economically active
    T         = integer > 2*S, number of periods to be simulated in TPI
    min_yr    = integer >= 0, age in years at which agents are born,
                minimum age
    max_yr    = integer >= 4, age in years at which agents die with
                certainty, maximum age
    curr_year = integer >= 2016, current year for which analysis will
                begin
    GraphDiag = boolean, =True if want graphical output and printed
                diagnostics

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        get_fert()
        get_mort()
        get_imm_resid()
        utils.read_file()
        pop_rebin()
        immsolve()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    age_per         = (E+S,) vector, age in years at each period of life
    fert_rates      = (E+S,) vector, fertility rates that correspond to
                      each model period of life
    mort_rates      = (E+S,) vector, mortality rates that correspond to
                      each model period of life
    infmort_rate    = scalar > 0, infant mortality rate from 2015 U.S.
                      CIA World Factbook
    mort_rates_S    = (S,) vector, mortality rates that correspond to
                      each economically active model period of life
    imm_rates_orig  = (E+S,) vector, immigration rates by age estimated
                      as residuals from get_imm_resid()
    OMEGA_orig      = (E+S, E+S) matrix, transition matrix for
                      population distribution law of motion
    eigvalues       = (E+S,) vector, eigenvalues of OMEGA matrix
    eigvectors      = (E+S, E+S) matrix, matrix of eigenvectors of OMEGA
                      where each column is the eigenvector that goes
                      with the corresponding eigenvalue in eigvalues
    g_n_SS_orig     = scalar, steady-state population growth rate, which
                      is the largest real part of the eigenvalues
    eigvec_raw      = (E+S,) vector, nonnormalized eigenvector
                      corresponding to the largest real-part eigenvalue
    omega_SS_orig   = (E+S,) vector, steady-state population
                      distribution which is normalized eigvec_raw
    omega_path_orig = (E+S, T) matrix, time path of the population
                      distribution from the current state to the steady-
                      state
    cur_path        = string, path in which calling file resides
    pop_file        = string, path of population data source csv file
    pop_data        = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                      Pop2013, for ages 0 to 100
    pop_data_samp   = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                      Pop2013, for ages 0 to 99
    age_year_all    = (100,) vector, ages by year from data, beg per=1
    pop_2013        = (100,) vector, population for ages 0 to 99 in 2013
    age_per_EpS     = (E+S,) vector, period numbers 1 through E+S
    pop_2013_EpS    = (E+S,) vector, population distribution by model
                      periods E + S in levels
    pop_2013_pct    = (E+S,) vector, 2013 population distribution in
                      percentages
    pop_curr        = (E+S,) vector, current-period population
                      distribution in percentages
    data_year       = integer, most recent year in data

    per             = integer, index for period
    pop_next        = (E+S,) vector, next-period population distribution
    imm_tol         = scalar > 0, tolerance for fsolve in immsolve()
    fixper          = ?
    omega_SSfx      = ?
    imm_objs        = ?
    imm_fulloutput  = ?
    imm_rates_adj   = ?
    imm_diagdict    = ?
    omega_path_S    = ?
    imm_rates_S     = ?
    imm_rates_S_adj = ?

    RETURNS: omega_path_S.T, g_n_SS,
        omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S,
        mort_rates_S, g_n_path, imm_rates_mat
    --------------------------------------------------------------------
    '''
    # age_per = np.linspace(min_yr, max_yr, E+S)
    fert_rates = get_fert(E + S, min_yr, max_yr, graph=False)
    mort_rates, infmort_rate = get_mort(E + S, min_yr, max_yr,
                                        graph=False)
    mort_rates_S = mort_rates[-S:]
    imm_rates_orig = get_imm_resid(E + S, min_yr, max_yr, graph=False)
    OMEGA_orig = np.zeros((E + S, E + S))
    OMEGA_orig[0, :] = ((1 - infmort_rate) * fert_rates +
                        np.hstack((imm_rates_orig[0], np.zeros(E+S-1))))
    OMEGA_orig[1:, :-1] += np.diag(1 - mort_rates[:-1])
    OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:])

    # Solve for steady-state population growth rate and steady-state
    # population distribution by age using eigenvalue and eigenvector
    # decomposition
    eigvalues, eigvectors = np.linalg.eig(OMEGA_orig)
    g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1
    eigvec_raw =\
        eigvectors[:,
                   (eigvalues[np.isreal(eigvalues)].real).argmax()].real
    omega_SS_orig = eigvec_raw / eigvec_raw.sum()

    # Generate time path of the nonstationary population distribution
    omega_path_lev = np.zeros((E + S, T + S))
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                               "data/demographic/pop_data.csv")
    pop_data = pd.read_csv(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) &
                             (pop_data['Age'] <= max_yr - 1)]
    pop_2013 = np.array(pop_data_samp['2013'], dtype='f')
    # Generate the current population distribution given that E+S might
    # be less than max_yr-min_yr+1
    age_per_EpS = np.arange(1, E + S + 1)
    pop_2013_EpS = pop_rebin(pop_2013, E + S)
    pop_2013_pct = pop_2013_EpS / pop_2013_EpS.sum()
    # Age most recent population data to the current year of analysis
    pop_curr = pop_2013_EpS.copy()
    data_year = 2013
    pop_next = np.dot(OMEGA_orig, pop_curr)
    g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum()) /
                pop_curr[-S:].sum())  # g_n in 2013
    pop_past = pop_curr  # assume 2012-2013 pop
    # Age the data to the current year
    for per in range(curr_year - data_year):
        pop_next = np.dot(OMEGA_orig, pop_curr)
        g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum()) /
                    pop_curr[-S:].sum())
        pop_past = pop_curr
        pop_curr = pop_next

    # Generate time path of the population distribution
    omega_path_lev[:, 0] = pop_curr.copy()
    for per in range(1, T + S):
        pop_next = np.dot(OMEGA_orig, pop_curr)
        omega_path_lev[:, per] = pop_next.copy()
        pop_curr = pop_next.copy()

    # Force the population distribution after 1.5*S periods to be the
    # steady-state distribution by adjusting immigration rates, holding
    # constant mortality, fertility, and SS growth rates
    imm_tol = 1e-14
    fixper = int(1.5 * S)
    omega_SSfx = (omega_path_lev[:, fixper] /
                  omega_path_lev[:, fixper].sum())
    imm_objs = (fert_rates, mort_rates, infmort_rate,
                omega_path_lev[:, fixper], g_n_SS)
    imm_fulloutput = opt.fsolve(immsolve, imm_rates_orig,
                                args=(imm_objs), full_output=True,
                                xtol=imm_tol)
    imm_rates_adj = imm_fulloutput[0]
    imm_diagdict = imm_fulloutput[1]
    omega_path_S = (omega_path_lev[-S:, :] /
                    np.tile(omega_path_lev[-S:, :].sum(axis=0), (S, 1)))
    omega_path_S[:, fixper:] = \
        np.tile(omega_path_S[:, fixper].reshape((S, 1)),
                (1, T + S - fixper))
    g_n_path = np.zeros(T + S)
    g_n_path[0] = g_n_curr.copy()
    g_n_path[1:] = ((omega_path_lev[-S:, 1:].sum(axis=0) -
                    omega_path_lev[-S:, :-1].sum(axis=0)) /
                    omega_path_lev[-S:, :-1].sum(axis=0))
    g_n_path[fixper + 1:] = g_n_SS
    omega_S_preTP = (pop_past.copy()[-S:]) / (pop_past.copy()[-S:].sum())
    imm_rates_mat = np.hstack((
        np.tile(np.reshape(imm_rates_orig[E:], (S, 1)), (1, fixper)),
        np.tile(np.reshape(imm_rates_adj[E:], (S, 1)), (1, T + S - fixper))))

    if GraphDiag:
        # Check whether original SS population distribution is close to
        # the period-T population distribution
        omegaSSmaxdif = np.absolute(omega_SS_orig -
                                    (omega_path_lev[:, T] /
                                     omega_path_lev[:, T].sum())).max()
        if omegaSSmaxdif > 0.0003:
            print("POP. WARNING: Max. abs. dist. between original SS " +
                  "pop. dist'n and period-T pop. dist'n is greater than" +
                  " 0.0003. It is " + str(omegaSSmaxdif) + ".")
        else:
            print("POP. SUCCESS: orig. SS pop. dist is very close to " +
                  "period-T pop. dist'n. The maximum absolute " +
                  "difference is " + str(omegaSSmaxdif) + ".")

        # Plot the adjusted steady-state population distribution versus
        # the original population distribution. The difference should be
        # small
        omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max()
        if omegaSSvTmaxdiff > 0.0003:
            print("POP. WARNING: The maximimum absolute difference " +
                  "between any two corresponding points in the original"
                  + " and adjusted steady-state population " +
                  "distributions is" + str(omegaSSvTmaxdiff) + ", " +
                  "which is greater than 0.0003.")
        else:
            print("POP. SUCCESS: The maximum absolute difference " +
                  "between any two corresponding points in the original"
                  + " and adjusted steady-state population " +
                  "distributions is " + str(omegaSSvTmaxdiff))
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n")
        plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        plt.title(
            'Original steady-state population distribution vs. fixed',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Pop. dist'n $\omega_{s}$")
        plt.xlim((0, E + S + 1))
        plt.legend(loc='upper right')
        # Create directory if OUTPUT directory does not already exist
        '''
        ----------------------------------------------------------------
        output_fldr = string, path of the OUTPUT folder from cur_path
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        cur_path = os.path.split(os.path.abspath(__file__))[0]
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) is False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "OrigVsFixSSpop")
        plt.savefig(output_path)
        plt.show()

        # Print whether or not the adjusted immigration rates solved the
        # zero condition
        immtol_solved = \
            np.absolute(imm_diagdict['fvec'].max()) < imm_tol
        if immtol_solved:
            print("POP. SUCCESS: Adjusted immigration rates solved " +
                  "with maximum absolute error of " +
                  str(np.absolute(imm_diagdict['fvec'].max())) +
                  ", which is less than the tolerance of " +
                  str(imm_tol))
        else:
            print("POP. WARNING: Adjusted immigration rates did not " +
                  "solve. Maximum absolute error of " +
                  str(np.absolute(imm_diagdict['fvec'].max())) +
                  " is greater than the tolerance of " + str(imm_tol))

        # Test whether the steady-state growth rates implied by the
        # adjusted OMEGA matrix equals the steady-state growth rate of
        # the original OMEGA matrix
        OMEGA2 = np.zeros((E + S, E + S))
        OMEGA2[0, :] = ((1 - infmort_rate) * fert_rates +
                        np.hstack((imm_rates_adj[0], np.zeros(E+S-1))))
        OMEGA2[1:, :-1] += np.diag(1 - mort_rates[:-1])
        OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:])
        eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2)
        g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1
        if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8):
            print("FAILURE: The steady-state population growth rate" +
                  " from adjusted OMEGA is different (diff is " +
                  str(g_n_SS_adj - g_n_SS) + ") than the steady-" +
                  "state population growth rate from the original" +
                  " OMEGA.")
        elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8):
            print("SUCCESS: The steady-state population growth rate" +
                  " from adjusted OMEGA is close to (diff is " +
                  str(g_n_SS_adj - g_n_SS) + ") the steady-" +
                  "state population growth rate from the original" +
                  " OMEGA.")

        # Do another test of the adjusted immigration rates. Create the
        # new OMEGA matrix implied by the new immigration rates. Plug in
        # the adjusted steady-state population distribution. Hit is with
        # the new OMEGA transition matrix and it should return the new
        # steady-state population distribution
        omega_new = np.dot(OMEGA2, omega_SSfx)
        omega_errs = np.absolute(omega_new - omega_SSfx)
        print("The maximum absolute difference between the adjusted " +
              "steady-state population distribution and the " +
              "distribution generated by hitting the adjusted OMEGA " +
              "transition matrix is " + str(omega_errs.max()))

        # Plot the original immigration rates versus the adjusted
        # immigration rates
        immratesmaxdiff = \
            np.absolute(imm_rates_orig - imm_rates_adj).max()
        print("The maximum absolute distance between any two points " +
              "of the original immigration rates and adjusted " +
              "immigration rates is " + str(immratesmaxdiff))
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates")
        plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        plt.title(
            'Original immigration rates vs. adjusted',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Imm. rates $i_{s}$")
        plt.xlim((0, E + S + 1))
        plt.legend(loc='upper center')
        # Create directory if OUTPUT directory does not already exist
        output_path = os.path.join(output_dir, "OrigVsAdjImm")
        plt.savefig(output_path)
        plt.show()

        # Plot population distributions for data_year, curr_year,
        # curr_year+20, omega_SSfx, and omega_SS_orig
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, pop_2013_pct, label="2013 pop.")
        plt.plot(age_per_EpS, (omega_path_lev[:, 0] /
                               omega_path_lev[:, 0].sum()),
                 label=str(curr_year) + " pop.")
        plt.plot(age_per_EpS, (omega_path_lev[:, int(0.5 * S)] /
                               omega_path_lev[:, int(0.5 * S)].sum()),
                 label="T=" + str(int(0.5 * S)) + " pop.")
        plt.plot(age_per_EpS, (omega_path_lev[:, int(S)] /
                               omega_path_lev[:, int(S)].sum()),
                 label="T=" + str(int(S)) + " pop.")
        plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        plt.title(
            'Population distribution at points in time path',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Pop. dist'n $\omega_{s}$")
        plt.xlim((0, E+S+1))
        plt.legend(loc='lower left')
        # Create directory if OUTPUT directory does not already exist
        output_path = os.path.join(output_dir, "PopDistPath")
        plt.savefig(output_path)
        plt.show()

    # return omega_path_S, g_n_SS, omega_SSfx, survival rates,
    # mort_rates_S, and g_n_path
    return (omega_path_S.T, g_n_SS, omega_SSfx[-S:] /
            omega_SSfx[-S:].sum(), 1-mort_rates_S, mort_rates_S,
            g_n_path, imm_rates_mat.T, omega_S_preTP)
Example #6
0
def get_mort(totpers, min_yr, max_yr, graph=False):
    '''
    --------------------------------------------------------------------
    This function generates a vector of mortality rates by model period
    age.
    (Source: Male and Female death probabilities Actuarial Life table,
    2011 Social Security Administration,
    http://www.ssa.gov/oact/STATS/table4c6.html)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, total number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utils.read_file()
        mort_rates2011.csv

    OBJECTS CREATED WITHIN FUNCTION:
    infmort_rate    = scalar > 0, infant mortality rate from 2015 U.S.
                      CIA World Factbook
    cur_path        = string, path where function calling file resides
    mort_file       = string, path of mortality rate data source (.csv)
    mort_data       = 120 x 7 DataFrame, 2011 mortality rate data for
                      men and women
    age_year_all    = (114,) vector, ages by year for which total
                      mortality have positive population weight
    mort_rates_all  = (114,) vector, mortality rates by all ages with
                      positive population weight
    mort_rates_mxyr = (100,) vector, truncated mortality rates by age
    binsize         = scalar > 0, size of each model period bin in data
                      years
    num_sub_bins    = scalar, an arbitrarily and deliberately large
                      number of sub-bins that each population bin will
                      be broken up into
    len_subbins     = scalar, length of a model period in data sub-bins
    mort_rates_sub  = (num_sub_bins*100,) vector, mortality rates by
                      sub-bin implied by mort_rates_mxyr
    mort_rates      = (totpers,) vector, mortality rates that correspond
                      to each period of life
    i               = integer >= 0, index of model period being computed
    beg_sub_bin     = integer >= 0, index of beginning sub-bin for
                      calculation of cumulative mortality rate of given
                      model period
    end_sub_bin     = integer >= 0, index of ending sub-bin + 1 for
                      calculation of cumulative mortality rate of given
                      model period

    FILES CREATED BY THIS FUNCTION:
        mort_rates.png

    RETURNS: mort_rates, infmort_rate
    --------------------------------------------------------------------
    '''
    # Get mortality rate by age data
    infmort_rate = 0.00587  # taken from 2015 U.S. infant mortality rate
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    mort_file = utils.read_file(
        cur_path, 'data/demographic/mort_rates2011.csv')
    mort_data = pd.read_csv(mort_file, sep=',', thousands=',')
    age_year_all = mort_data['Age'] + 1
    mort_rates_all = (
        ((mort_data['Male Mort. Rate'] * mort_data['Num. Male Lives']) +
         (mort_data['Female Mort. Rate'] *
          mort_data['Num. Female Lives'])) /
        (mort_data['Num. Male Lives'] + mort_data['Num. Female Lives']))
    age_year_all = age_year_all[np.isfinite(mort_rates_all)]
    mort_rates_all = mort_rates_all[np.isfinite(mort_rates_all)]
    # Calculate implied mortality rates in sub-bins of mort_rates_all.
    mort_rates_mxyr = mort_rates_all[0:max_yr]
    num_sub_bins = int(100)
    len_subbins = ((np.float64((max_yr - min_yr + 1) * num_sub_bins)) /
                   totpers)
    mort_rates_sub = np.zeros(num_sub_bins * max_yr, dtype=float)
    for i in range(max_yr):
        mort_rates_sub[i * num_sub_bins:(i + 1) * num_sub_bins] =\
            (1 - ((1 - mort_rates_mxyr[i]) ** (1.0 / num_sub_bins)))
    mort_rates = np.zeros(totpers)
    end_sub_bin = 0
    for i in range(totpers):
        beg_sub_bin = int(end_sub_bin)
        end_sub_bin = int(np.rint((i + 1) * len_subbins))
        mort_rates[i] = (
            1 - (1 - (mort_rates_sub[beg_sub_bin:end_sub_bin])).prod())
    mort_rates[-1] = 1  # Mortality rate in last period is set to 1

    if graph:
        '''
        ----------------------------------------------------------------
        age_mid_new = (totpers,) vector, midpoint age of each model
                      period age bin
        output_fldr = string, folder in current path to save files
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr,
                                   totpers) - (0.5 * np.float(max_yr) /
                                               totpers))
        fig, ax = plt.subplots()
        plt.scatter(np.hstack([0, age_year_all]),
                    np.hstack([infmort_rate, mort_rates_all]),
                    s=20, c='blue', marker='o', label='Data')
        plt.scatter(np.hstack([0, age_mid_new]),
                    np.hstack([infmort_rate, mort_rates]),
                    s=40, c='red', marker='d',
                    label='Model period (cumulative)')
        plt.plot(np.hstack([0, age_year_all[min_yr - 1:max_yr]]),
                 np.hstack([infmort_rate,
                            mort_rates_all[min_yr - 1:max_yr]]))
        plt.axvline(x=max_yr, color='red', linestyle='-', linewidth=1)
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        # plt.title('Fitted mortality rate function by age ($rho_{s}$)',
        #     fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r'Mortality rate $\rho_{s}$')
        plt.xlim((min_yr-2, age_year_all.max()+2))
        plt.ylim((-0.05, 1.05))
        plt.legend(loc='upper left')
        plt.text(-5, -0.2,
                 "Source: Actuarial Life table, 2011 Social Security " +
                 "Administration.", fontsize=9)
        plt.tight_layout(rect=(0, 0.03, 1, 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) is False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "mort_rates")
        plt.savefig(output_path)
        # plt.show()

    return mort_rates, infmort_rate
Example #7
0
def get_fert(totpers, min_yr, max_yr, graph=False):
    '''
    --------------------------------------------------------------------
    This function generates a vector of fertility rates by model period
    age that corresponds to the fertility rate data by age in years
    (Source: National Vital Statistics Reports, Volume 64, Number 1,
    January 15, 2015, Table 3, final 2013 data
    http://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_01.pdf)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, total number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utlis.read_file()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    cur_path       = string, path in which calling file resides
    pop_file       = string, path of population data source csv file
    pop_data       = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 100
    pop_data_samp  = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 99
    age_year_all   = (100,) vector, ages by year from data (beg per=1)
    curr_pop       = (100,) vector, population for ages 0 to 99 in 2013
    curr_pop_pct   = (100,) vector, population (in percent) for ages 0
                     to 99 in 2013
    fert_data      = (13,) vector, fertility rates for given age bins.
                     We divide numbers by 2,000 because original data is
                     in births per 1000 women. We assume an equal number
                     of men. Added two zeros on the front and on the
                     back to make spline interpolation work right
    age_midp       = (13,) vector, midpoint age of age bins ranges from
                     original data (9, 10, 10-14, 15-17, 18-19, 20-24,
                     25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The
                     first two and last two are not data
    fert_func      = function, generated by interp1d function, takes
                     ages and returns the interpolated fertility rates
    binsize        = scalar > 0, size of each model period bin in data
                     years
    num_sub_bins   = scalar, an arbitrarily and deliberately large
                     number of sub-bins that each population bin will be
                     broken up into
    len_subbins    = scalar, length of a model period in data sub-bins
    age_sub        = (num_sub_bins*100,) vector, midpoint ages of each
                     data sub-bin
    curr_pop_sub   = (num_sub_bins*100,) vector, population linearly
                     interpolated from data in each sub-bin
    fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub-
                     bin interpolated from fert_func()
    pred_ind       = (num_sub_bins*100,) boolean vector, =True if period
                     is one that must be interpolated
    age_pred       = (num_sub_bins*100-some,) vector, midpoint age in
                     years corresponding to each period to be
                     interpolated
    fert_rates     = (totpers,) vector, fertility rates for each model
                     period of life
    i              = integer >= 0, index of model period being computed
    beg_sub_bin    = integer >= 0, index of beginning sub-bin for
                     calculation of average fertility rate of given
                     model period
    end_sub_bin    = integer >= 0, index of ending sub-bin + 1 for
                     calculation of average fertility rate of given
                     model period

    FILES CREATED BY THIS FUNCTION:
        fert_rates.png

    RETURNS: fert_rates
    --------------------------------------------------------------------
    '''
    # Get current population data (2013) for weighting
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                               "data/demographic/pop_data.csv")
    pop_data = pd.read_csv(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) &
                             (pop_data['Age'] <= max_yr - 1)]
    curr_pop = np.array(pop_data_samp['2013'], dtype='f')
    curr_pop_pct = curr_pop / curr_pop.sum()
    # Get fertility rate by age-bin data
    fert_data = (np.array([0.0, 0.0, 0.3, 12.3, 47.1, 80.7, 105.5, 98.0,
                           49.3, 10.4, 0.8, 0.0, 0.0]) / 2000)
    age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47,
                         55, 56])
    # Generate interpolation functions for fertility rates
    fert_func = si.interp1d(age_midp, fert_data, kind='cubic')
    # Calculate average fertility rate in each age bin using trapezoid
    # method with a large number of points in each bin.
    binsize = (max_yr - min_yr + 1) / totpers
    num_sub_bins = float(10000)
    len_subbins = (np.float64(100 * num_sub_bins)) / totpers
    age_sub = (np.linspace(np.float64(binsize) / num_sub_bins,
                           np.float64(max_yr),
                           int(num_sub_bins*max_yr)) - 0.5 *
               np.float64(binsize) / num_sub_bins)
    curr_pop_sub = np.repeat(np.float64(curr_pop_pct) / num_sub_bins,
                             num_sub_bins)
    fert_rates_sub = np.zeros(curr_pop_sub.shape)
    pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1])
    age_pred = age_sub[pred_ind]
    fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred))
    fert_rates = np.zeros(totpers)
    end_sub_bin = 0
    for i in range(totpers):
        beg_sub_bin = int(end_sub_bin)
        end_sub_bin = int(np.rint((i + 1) * len_subbins))
        fert_rates[i] = ((
            curr_pop_sub[beg_sub_bin:end_sub_bin] *
            fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() /
            curr_pop_sub[beg_sub_bin:end_sub_bin].sum())

    if graph:
        '''
        ----------------------------------------------------------------
        age_fine_pred  = (300,) vector, equally spaced support of ages
                         between the minimum and maximum interpolating
                         ages
        fert_fine_pred = (300,) vector, interpolated fertility rates
                         based on age_fine_pred
        age_fine       = (300+some,) vector of ages including leading
                         and trailing zeros
        fert_fine      = (300+some,) vector of fertility rates including
                         leading and trailing zeros
        age_mid_new    = (totpers,) vector, midpoint age of each model
                         period age bin
        output_fldr    = string, folder in current path to save files
        output_dir     = string, total path of OUTPUT folder
        output_path    = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        # Generate finer age vector and fertility rate vector for
        # graphing cubic spline interpolating function
        age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300)
        fert_fine_pred = fert_func(age_fine_pred)
        age_fine = np.hstack((min_yr, age_fine_pred, max_yr))
        fert_fine = np.hstack((0, fert_fine_pred, 0))
        age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr,
                                   totpers) - (0.5 * np.float(max_yr) /
                                               totpers))

        fig, ax = plt.subplots()
        plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o',
                    label='Data')
        plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d',
                    label='Model period (integrated)')
        plt.plot(age_fine, fert_fine, label='Cubic spline')
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        # plt.title('Fitted fertility rate function by age ($f_{s}$)',
        #     fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r'Fertility rate $f_{s}$')
        plt.xlim((min_yr - 1, max_yr + 1))
        plt.ylim((-0.15 * (fert_fine_pred.max()),
                  1.15 * (fert_fine_pred.max())))
        plt.legend(loc='upper right')
        plt.text(-5, -0.018,
                 "Source: National Vital Statistics Reports, " +
                 "Volume 64, Number 1, January 15, 2015.", fontsize=9)
        plt.tight_layout(rect=(0, 0.03, 1, 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) is False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "fert_rates")
        plt.savefig(output_path)

    return fert_rates
Example #8
0
def get_pop_objs(E, S, T, min_yr, max_yr, curr_year, GraphDiag=True):
    '''
    --------------------------------------------------------------------
    This function produces the demographics objects to be used in the
    OG-USA model package.
    --------------------------------------------------------------------
    INPUTS:
    E         = integer >= 1, number of model periods in which agent is
                not economically active
    S         = integer >= 3, number of model periods in which agent is
                economically active
    T         = integer > 2*S, number of periods to be simulated in TPI
    min_yr    = integer >= 0, age in years at which agents are born,
                minimum age
    max_yr    = integer >= 4, age in years at which agents die with
                certainty, maximum age
    curr_year = integer >= 2016, current year for which analysis will
                begin
    GraphDiag = boolean, =True if want graphical output and printed
                diagnostics

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        get_fert()
        get_mort()
        get_imm_resid()
        utils.read_file()
        pop_rebin()
        immsolve()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    age_per         = (E+S,) vector, age in years at each period of life
    fert_rates      = (E+S,) vector, fertility rates that correspond to
                      each model period of life
    mort_rates      = (E+S,) vector, mortality rates that correspond to
                      each model period of life
    infmort_rate    = scalar > 0, infant mortality rate from 2015 U.S.
                      CIA World Factbook
    mort_rates_S    = (S,) vector, mortality rates that correspond to
                      each economically active model period of life
    imm_rates_orig  = (E+S,) vector, immigration rates by age estimated
                      as residuals from get_imm_resid()
    OMEGA_orig      = (E+S, E+S) matrix, transition matrix for
                      population distribution law of motion
    eigvalues       = (E+S,) vector, eigenvalues of OMEGA matrix
    eigvectors      = (E+S, E+S) matrix, matrix of eigenvectors of OMEGA
                      where each column is the eigenvector that goes
                      with the corresponding eigenvalue in eigvalues
    g_n_SS_orig     = scalar, steady-state population growth rate, which
                      is the largest real part of the eigenvalues
    eigvec_raw      = (E+S,) vector, nonnormalized eigenvector
                      corresponding to the largest real-part eigenvalue
    omega_SS_orig   = (E+S,) vector, steady-state population
                      distribution which is normalized eigvec_raw
    omega_path_orig = (E+S, T) matrix, time path of the population
                      distribution from the current state to the steady-
                      state
    cur_path        = string, path in which calling file resides
    pop_file        = string, path of population data source csv file
    pop_data        = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                      Pop2013, for ages 0 to 100
    pop_data_samp   = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                      Pop2013, for ages 0 to 99
    age_year_all    = (100,) vector, ages by year from data, beg per=1
    pop_2013        = (100,) vector, population for ages 0 to 99 in 2013
    age_per_EpS     = (E+S,) vector, period numbers 1 through E+S
    pop_2013_EpS    = (E+S,) vector, population distribution by model
                      periods E + S in levels
    pop_2013_pct    = (E+S,) vector, 2013 population distribution in
                      percentages
    pop_curr        = (E+S,) vector, current-period population
                      distribution in percentages
    data_year       = integer, most recent year in data

    per             = integer, index for period
    pop_next        = (E+S,) vector, next-period population distribution
    imm_tol         = scalar > 0, tolerance for fsolve in immsolve()
    fixper          = ?
    omega_SSfx      = ?
    imm_objs        = ?
    imm_fulloutput  = ?
    imm_rates_adj   = ?
    imm_diagdict    = ?
    omega_path_S    = ?
    imm_rates_S     = ?
    imm_rates_S_adj = ?

    RETURNS: omega_path_S.T, g_n_SS,
        omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S,
        mort_rates_S, g_n_path, imm_rates_mat
    --------------------------------------------------------------------
    '''
    age_per = np.linspace(min_yr, max_yr, E+S)
    fert_rates = get_fert(E+S, min_yr, max_yr, graph=False)
    mort_rates, infmort_rate = get_mort(E+S, min_yr, max_yr,
                                        graph=False)
    mort_rates_S = mort_rates[-S:]
    imm_rates_orig = get_imm_resid(E+S, min_yr, max_yr, graph=False)
    #imm_rates_orig = np.zeros(E+S)
    imm_rates_S = imm_rates_orig[-S:]
    OMEGA_orig = np.zeros((E+S, E+S))
    OMEGA_orig[0, :] = ((1 - infmort_rate) * fert_rates +
                  np.hstack((imm_rates_orig[0], np.zeros(E+S-1))))
    OMEGA_orig[1:, :-1] += np.diag(1-mort_rates[:-1])
    OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:])

    # Solve for steady-state population growth rate and steady-state
    # population distribution by age using eigenvalue and eigenvector
    # decomposition
    eigvalues, eigvectors = np.linalg.eig(OMEGA_orig)
    g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1
    eigvec_raw = eigvectors[:,
        (eigvalues[np.isreal(eigvalues)].real).argmax()].real
    omega_SS_orig = eigvec_raw / eigvec_raw.sum()

    # Generate time path of the nonstationary population distribution
    omega_path_lev = np.zeros((E+S, T+S))
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                "data/demographic/pop_data.csv")
    pop_data = pd.read_table(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) &
                    (pop_data['Age']<=max_yr-1)]
    age_year_all = pop_data_samp['Age'] + 1
    pop_2013 = np.array(pop_data_samp['2013'], dtype='f')
    # Generate the current population distribution given that E+S might
    # be less than max_yr-min_yr+1
    age_per_EpS = np.arange(1, E+S+1)
    pop_2013_EpS = pop_rebin(pop_2013, E+S)
    pop_2013_pct = pop_2013_EpS / pop_2013_EpS.sum()
    # Age most recent population data to the current year of analysis
    pop_curr = pop_2013_EpS.copy()
    data_year = 2013
    pop_next = np.dot(OMEGA_orig, pop_curr)
    g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum())/
                pop_curr[-S:].sum())  # g_n in 2013
    pop_past = pop_curr  # assume 2012-2013 pop
    for per in range(curr_year - data_year): # Age the data to
                                               # the current year
        pop_next = np.dot(OMEGA_orig, pop_curr)
        g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum())/
                    pop_curr[-S:].sum())
        pop_past = pop_curr
        pop_curr = pop_next
    curr_dict = {"pop_" + str(curr_year) + "_pct":
                pop_curr.copy() / pop_curr.sum()}

    # Generate time path of the population distribution
    omega_path_lev[:,0] = pop_curr.copy()
    for per in range(1, T+S):
        pop_next = np.dot(OMEGA_orig, pop_curr)
        omega_path_lev[:, per] = pop_next.copy()
        pop_curr = pop_next.copy()

    # Force the population distribution after 1.5*S periods to be the
    # steady-state distribution by adjusting immigration rates, holding
    # constant mortality, fertility, and SS growth rates
    imm_tol = 1e-14
    fixper = int(1.5*S)
    omega_SSfx = (omega_path_lev[:, fixper] /
                 omega_path_lev[:, fixper].sum())
    imm_objs = (fert_rates, mort_rates, infmort_rate,
               omega_path_lev[:, fixper], g_n_SS)
    imm_fulloutput = opt.fsolve(immsolve, imm_rates_orig,
        args=(imm_objs), full_output=True, xtol=imm_tol)
    imm_rates_adj = imm_fulloutput[0]
    #imm_rates_adj = np.zeros(E+S)
    imm_rates_S_adj = imm_rates_adj[-S:]
    imm_diagdict = imm_fulloutput[1]
    omega_path_S = (omega_path_lev[-S:, :] /
        np.tile(omega_path_lev[-S:, :].sum(axis=0),(S, 1)))
    omega_path_S[:, fixper:] = \
        np.tile(omega_path_S[:, fixper].reshape((S, 1)),
        (1, T+S-fixper))
    g_n_path = np.zeros(T+S)
    g_n_path[0] = g_n_curr.copy()
    g_n_path[1:] = ((omega_path_lev[-S:, 1:].sum(axis=0) -
                    omega_path_lev[-S:, :-1].sum(axis=0)) /
                    omega_path_lev[-S:, :-1].sum(axis=0))
    g_n_path[fixper+1:] = g_n_SS
    omega_S_preTP = (pop_past.copy()[-S:])/(pop_past.copy()[-S:].sum())


    imm_rates_mat = np.hstack((
        np.tile(np.reshape(imm_rates_orig[E:],(S,1)), (1, fixper)),
        np.tile(np.reshape(imm_rates_adj[E:],(S,1)), (1, T+S-fixper))))

    # omega_diffs_orig = (omega_path_S[1:,1:] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*np.tile(np.reshape(imm_rates_orig[E+1:],(S-1,1)),(1,T+S-1))*omega_path_S[1:,:-1])
    # omega_diffs_adj = (omega_path_S[1:,1:] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*np.tile(np.reshape(imm_rates_adj[E+1:],(S-1,1)),(1,T+S-1))*omega_path_S[1:,:-1])
    # omega_diffs_mixed = (omega_path_S[1:,1:] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] -
    #     (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*imm_rates_mat[1:,:-1]*omega_path_S[1:,:-1])
    # np.savetxt('omega_diffs_orig.csv', omega_diffs_orig, delimiter=',')
    # np.savetxt('omega_diffs_adj.csv', omega_diffs_adj, delimiter=',')
    # np.savetxt('omega_diffs_mixed.csv', omega_diffs_mixed, delimiter=',')




    if GraphDiag == True:
        # Check whether original SS population distribution is close to
        # the period-T population distribution
        omegaSSmaxdif = np.absolute(omega_SS_orig -
                        (omega_path_lev[:,T] /
                        omega_path_lev[:,T].sum())).max()
        if omegaSSmaxdif > 0.0003:
            print("POP. WARNING: Max. abs. dist. between original SS " +
                "pop. dist'n and period-T pop. dist'n is greater than" +
                  " 0.0003. It is " + str(omegaSSmaxdif) + ".")
        else:
            print("POP. SUCCESS: orig. SS pop. dist is very close to " +
                  "period-T pop. dist'n. The maximum absolute " +
                  "difference is " + str(omegaSSmaxdif) + ".")

        # Plot the adjusted steady-state population distribution versus
        # the original population distribution. The difference should be
        # small
        omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max()
        if omegaSSvTmaxdiff > 0.0003:
            print("POP. WARNING: The maximimum absolute difference " +
                  "between any two corresponding points in the original"
                  + " and adjusted steady-state population " +
                  "distributions is" + str(omegaSSvTmaxdiff) + ", "+
                  "which is greater than 0.0003.")
        else:
            print("POP. SUCCESS: The maximum absolute difference " +
                  "between any two corresponding points in the original"
                  + " and adjusted steady-state population " +
                  "distributions is " + str(omegaSSvTmaxdiff))
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n")
        plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        plt.title(
            'Original steady-state population distribution vs. fixed',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Pop. dist'n $\omega_{s}$")
        plt.xlim((0, E+S+1))
        plt.legend(loc='upper right')
        # Create directory if OUTPUT directory does not already exist
        '''
        ----------------------------------------------------------------
        output_fldr = string, path of the OUTPUT folder from cur_path
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        cur_path = os.path.split(os.path.abspath(__file__))[0]
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) == False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "OrigVsFixSSpop")
        plt.savefig(output_path)
        plt.show()

        # Print whether or not the adjusted immigration rates solved the
        # zero condition
        immtol_solved = \
            np.absolute(imm_diagdict['fvec'].max()) < imm_tol
        if immtol_solved == True:
            print("POP. SUCCESS: Adjusted immigration rates solved " +
                  "with maximum absolute error of " +
                  str(np.absolute(imm_diagdict['fvec'].max())) +
                  ", which is less than the tolerance of " +
                  str(imm_tol))
        else:
            print("POP. WARNING: Adjusted immigration rates did not " +
                  "solve. Maximum absolute error of " +
                  str(np.absolute(imm_diagdict['fvec'].max())) +
                  " is greater than the tolerance of " + str(imm_tol))

        # Test whether the steady-state growth rates implied by the
        # adjusted OMEGA matrix equals the steady-state growth rate of
        # the original OMEGA matrix
        OMEGA2 = np.zeros((E+S, E+S))
        OMEGA2[0, :] = ((1 - infmort_rate) * fert_rates +
                       np.hstack((imm_rates_adj[0], np.zeros(E+S-1))))
        OMEGA2[1:, :-1] += np.diag(1-mort_rates[:-1])
        OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:])
        eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2)
        g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1
        if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8):
            print("FAILURE: The steady-state population growth rate" +
                  " from adjusted OMEGA is different (diff is " +
                  str(g_n_SS_adj - g_n_SS)  + ") than the steady-" +
                  "state population growth rate from the original" +
                  " OMEGA.")
        elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8):
            print("SUCCESS: The steady-state population growth rate" +
                  " from adjusted OMEGA is close to (diff is " +
                  str(g_n_SS_adj - g_n_SS)  + ") the steady-" +
                  "state population growth rate from the original" +
                  " OMEGA.")

        # Do another test of the adjusted immigration rates. Create the
        # new OMEGA matrix implied by the new immigration rates. Plug in
        # the adjusted steady-state population distribution. Hit is with
        # the new OMEGA transition matrix and it should return the new
        # steady-state population distribution
        omega_new = np.dot(OMEGA2, omega_SSfx)
        omega_errs = np.absolute(omega_new - omega_SSfx)
        print("The maximum absolute difference between the adjusted " +
              "steady-state population distribution and the " +
              "distribution generated by hitting the adjusted OMEGA " +
              "transition matrix is " + str(omega_errs.max()))

        # Plot the original immigration rates versus the adjusted
        # immigration rates
        immratesmaxdiff = \
            np.absolute(imm_rates_orig - imm_rates_adj).max()
        print ("The maximum absolute distance between any two points " +
               "of the original immigration rates and adjusted " +
               "immigration rates is " + str(immratesmaxdiff))
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates")
        plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        plt.title(
            'Original immigration rates vs. adjusted',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Imm. rates $i_{s}$")
        plt.xlim((0, E+S+1))
        plt.legend(loc='upper center')
        # Create directory if OUTPUT directory does not already exist
        output_path = os.path.join(output_dir, "OrigVsAdjImm")
        plt.savefig(output_path)
        plt.show()

        # Plot population distributions for data_year, curr_year,
        # curr_year+20, omega_SSfx, and omega_SS_orig
        fig, ax = plt.subplots()
        plt.plot(age_per_EpS, pop_2013_pct, label="2013 pop.")
        plt.plot(age_per_EpS,
            (omega_path_lev[:, 0] / omega_path_lev[:, 0].sum()),
            label=str(curr_year)+" pop.")
        plt.plot(age_per_EpS, (omega_path_lev[:, int(0.5 * S)] /
            omega_path_lev[:, int(0.5 * S)].sum()),
            label="T="+str(int(0.5 * S))+" pop.")
        plt.plot(age_per_EpS, (omega_path_lev[:, int(S)] /
            omega_path_lev[:, int(S)].sum()),
            label="T="+str(int(S))+" pop.")
        plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.")
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        plt.title(
            'Population distribution at points in time path',
            fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r"Pop. dist'n $\omega_{s}$")
        plt.xlim((0, E+S+1))
        plt.legend(loc='lower left')
        # Create directory if OUTPUT directory does not already exist
        output_path = os.path.join(output_dir, "PopDistPath")
        plt.savefig(output_path)
        plt.show()

    # return omega_path_S, g_n_SS, omega_SSfx, survival rates,
    # mort_rates_S, and g_n_path
    return (omega_path_S.T, g_n_SS,
        omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S,
        mort_rates_S, g_n_path, imm_rates_mat.T, omega_S_preTP)
Example #9
0
def get_imm_resid(totpers, min_yr, max_yr, graph=True):
    '''
    --------------------------------------------------------------------
    Calculate immigration rates by age as a residual given population
    levels in different periods, then output average calculated
    immigration rate. We have to replace the first mortality rate in
    this function in order to adjust the first implied immigration rate
    (Source: Population data come from Annual Estimates of the Resident
    Population by Single Year of Age and Sex: April 1, 2010 to July 1,
    2013 (Both sexes) National Characteristics, Vintage 2013, US Census
    Bureau,
    http://www.census.gov/popest/data/national/asrh/2013/index.html)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utils.read_file()
        get_fert()
        get_mort()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    cur_path      = string, path in which calling file resides
    pop_file      = string, path of population data source csv file
    pop_data      = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                    Pop2013, for ages 0 to 100
    pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                    Pop2013, for ages 0 to 99
    age_year_all  = (100,) vector, ages by year from data (beg per is 1)
    pop_2010      = (100,) vector, population for ages 0 to 99 in 2010
    pop_2011      = (100,) vector, population for ages 0 to 99 in 2011
    pop_2012      = (100,) vector, population for ages 0 to 99 in 2012
    pop_2013      = (100,) vector, population for ages 0 to 99 in 2013
    imm_mat       = (3, 100) matrix, immigration rates computed as
                    residuals for each age in three successive pairs of
                    years
    pop11vec      = (3,) vector, age-1 population in first three years
    pop21vec      = (3,) vector, age-1 population in last three years
    fert_rates    = (100,) vector, fertility rates by model age
    mort_rates    = (100,) vector, mortality rates by model age
    infmort_rate  = scalar > 0, infant mortality rate from 2015 U.S. CIA
                    World Factbook
    newbornvec    = (3,) vector, total births in first three years
    pop11mat      = (3, 99) matrix, population of age 1 through 99 for
                    first three years
    pop12mat      = (3, 99) matrix, population of age 2 through 100 for
                    first three years
    pop22mat      = (3, 99) matrix, population of age 2 through 100 for
                    last three years
    mort_mat      = (3, 99) matrix, the first 99 mortality rates copied
                    into 3 rows
    imm_rates_all = (100,) vector, average of three years residual
                    immigration rates by each age in data
    imm_func      = function, generated by interp1d function, takes
                    ages and returns the interpolated immigration rates
    age_per       = (E+S,) vector, age in years at each period of life
    imm_rates     = (E+S,) vector, immigration rates that correspond to
                    each period of life

    RETURNS: imm_rates
    --------------------------------------------------------------------
    '''
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                "data/demographic/pop_data.csv")
    pop_data = pd.read_table(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) &
                    (pop_data['Age']<=max_yr-1)]
    age_year_all = pop_data_samp['Age'] + 1
    pop_2010, pop_2011, pop_2012, pop_2013 = (
        np.array(pop_data_samp['2010'], dtype='f'),
        np.array(pop_data_samp['2011'], dtype='f'),
        np.array(pop_data_samp['2012'], dtype='f'),
        np.array(pop_data_samp['2013'], dtype='f'))
    pop_2010_EpS = pop_rebin(pop_2010, totpers)
    pop_2011_EpS = pop_rebin(pop_2011, totpers)
    pop_2012_EpS = pop_rebin(pop_2012, totpers)
    pop_2013_EpS = pop_rebin(pop_2013, totpers)
    # Create three years of estimated immigration rates for youngest age
    # individuals
    imm_mat = np.zeros((3, totpers))
    pop11vec = np.array([pop_2010_EpS[0], pop_2011_EpS[0],
               pop_2012_EpS[0]])
    pop21vec = np.array([pop_2011_EpS[0], pop_2012_EpS[0],
               pop_2013_EpS[0]])
    fert_rates = get_fert(totpers, min_yr, max_yr, False)
    mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False)
    newbornvec = np.dot(fert_rates,
        np.vstack((pop_2010_EpS, pop_2011_EpS, pop_2012_EpS)).T)
    imm_mat[:, 0] = ((pop21vec - (1 - infmort_rate) * newbornvec) /
                    pop11vec)
    # Estimate 3 years of immigration rates for all other-aged
    # individuals
    pop11mat = np.vstack((pop_2010_EpS[:-1], pop_2011_EpS[:-1],
               pop_2012_EpS[:-1]))
    pop12mat = np.vstack((pop_2010_EpS[1:], pop_2011_EpS[1:],
               pop_2012_EpS[1:]))
    pop22mat = np.vstack((pop_2011_EpS[1:], pop_2012_EpS[1:],
               pop_2013_EpS[1:]))
    mort_mat = np.tile(mort_rates[:-1], (3, 1))
    imm_mat[:, 1:] = (pop22mat - (1 - mort_mat) * pop11mat) / pop12mat
    # Final estimated immigration rates are the averages over 3 years
    imm_rates = imm_mat.mean(axis=0)
    age_per = np.linspace(1, totpers, totpers)

    if graph == True:
        '''
        ----------------------------------------------------------------
        output_fldr = string, path of the OUTPUT folder from cur_path
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        fig, ax = plt.subplots()
        plt.scatter(age_per, imm_rates, s=40, c='red', marker='d')
        plt.plot(age_per, imm_rates)
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        # plt.title('Fitted immigration rates by age ($i_{s}$), residual',
        #     fontsize=20)
        plt.xlabel(r'Age $s$ (model periods)')
        plt.ylabel(r'Imm. rate $i_{s}$')
        plt.xlim((0, totpers+1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) == False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "imm_rates_orig")
        plt.savefig(output_path)
        # plt.show()

    return imm_rates
Example #10
0
def get_fert(totpers, min_yr, max_yr, graph=False):
    '''
    --------------------------------------------------------------------
    This function generates a vector of fertility rates by model period
    age that corresponds to the fertility rate data by age in years
    (Source: National Vital Statistics Reports, Volume 64, Number 1,
    January 15, 2015, Table 3, final 2013 data
    http://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_01.pdf)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, total number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utlis.read_file()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    cur_path       = string, path in which calling file resides
    pop_file       = string, path of population data source csv file
    pop_data       = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 100
    pop_data_samp  = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 99
    age_year_all   = (100,) vector, ages by year from data (beg per=1)
    curr_pop       = (100,) vector, population for ages 0 to 99 in 2013
    curr_pop_pct   = (100,) vector, population (in percent) for ages 0
                     to 99 in 2013
    fert_data      = (13,) vector, fertility rates for given age bins.
                     We divide numbers by 2,000 because original data is
                     in births per 1000 women. We assume an equal number
                     of men. Added two zeros on the front and on the
                     back to make spline interpolation work right
    age_midp       = (13,) vector, midpoint age of age bins ranges from
                     original data (9, 10, 10-14, 15-17, 18-19, 20-24,
                     25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The
                     first two and last two are not data
    fert_func      = function, generated by interp1d function, takes
                     ages and returns the interpolated fertility rates
    binsize        = scalar > 0, size of each model period bin in data
                     years
    num_sub_bins   = scalar, an arbitrarily and deliberately large
                     number of sub-bins that each population bin will be
                     broken up into
    len_subbins    = scalar, length of a model period in data sub-bins
    age_sub        = (num_sub_bins*100,) vector, midpoint ages of each
                     data sub-bin
    curr_pop_sub   = (num_sub_bins*100,) vector, population linearly
                     interpolated from data in each sub-bin
    fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub-
                     bin interpolated from fert_func()
    pred_ind       = (num_sub_bins*100,) boolean vector, =True if period
                     is one that must be interpolated
    age_pred       = (num_sub_bins*100-some,) vector, midpoint age in
                     years corresponding to each period to be
                     interpolated
    fert_rates     = (totpers,) vector, fertility rates for each model
                     period of life
    i              = integer >= 0, index of model period being computed
    beg_sub_bin    = integer >= 0, index of beginning sub-bin for
                     calculation of average fertility rate of given
                     model period
    end_sub_bin    = integer >= 0, index of ending sub-bin + 1 for
                     calculation of average fertility rate of given
                     model period

    FILES CREATED BY THIS FUNCTION:
        fert_rates.png

    RETURNS: fert_rates
    --------------------------------------------------------------------
    '''
    # Get current population data (2013) for weighting
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    pop_file = utils.read_file(cur_path,
                "data/demographic/pop_data.csv")
    pop_data = pd.read_table(pop_file, sep=',', thousands=',')
    pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) &
                    (pop_data['Age']<=max_yr-1)]
    age_year_all = pop_data_samp['Age'] + 1
    curr_pop = np.array(pop_data_samp['2013'], dtype='f')
    curr_pop_pct = curr_pop / curr_pop.sum()
    # Get fertility rate by age-bin data
    fert_data = (np.array([0.0, 0.0, 0.3, 12.3, 47.1, 80.7, 105.5, 98.0,
                49.3, 10.4, 0.8, 0.0, 0.0]) / 2000)
    age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47,
               55, 56])
    # Generate interpolation functions for fertility rates
    fert_func = si.interp1d(age_midp, fert_data, kind='cubic')
    # Calculate average fertility rate in each age bin using trapezoid
    # method with a large number of points in each bin.
    binsize = (max_yr - min_yr + 1) / totpers
    num_sub_bins = float(10000)
    len_subbins = (np.float64(100 * num_sub_bins)) / totpers
    age_sub = (np.linspace(np.float64(binsize) / num_sub_bins,
              np.float64(max_yr), int(num_sub_bins*max_yr)) -
              0.5 * np.float64(binsize) / num_sub_bins)
    curr_pop_sub = np.repeat(np.float64(curr_pop_pct) /
                   num_sub_bins, num_sub_bins)
    fert_rates_sub = np.zeros(curr_pop_sub.shape)
    pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1])
    age_pred = age_sub[pred_ind]
    fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred))
    fert_rates = np.zeros(totpers)
    end_sub_bin = 0
    for i in range(totpers):
        beg_sub_bin = int(end_sub_bin)
        end_sub_bin = int(np.rint((i + 1) * len_subbins))
        fert_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] *
            fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() /
            curr_pop_sub[beg_sub_bin:end_sub_bin].sum())

    if graph == True:
        '''
        ----------------------------------------------------------------
        age_fine_pred  = (300,) vector, equally spaced support of ages
                         between the minimum and maximum interpolating
                         ages
        fert_fine_pred = (300,) vector, interpolated fertility rates
                         based on age_fine_pred
        age_fine       = (300+some,) vector of ages including leading
                         and trailing zeros
        fert_fine      = (300+some,) vector of fertility rates including
                         leading and trailing zeros
        age_mid_new    = (totpers,) vector, midpoint age of each model
                         period age bin
        output_fldr    = string, folder in current path to save files
        output_dir     = string, total path of OUTPUT folder
        output_path    = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        # Generate finer age vector and fertility rate vector for
        # graphing cubic spline interpolating function
        age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300)
        fert_fine_pred = fert_func(age_fine_pred)
        age_fine = np.hstack((min_yr, age_fine_pred, max_yr))
        fert_fine = np.hstack((0, fert_fine_pred, 0))
        age_mid_new = (np.linspace(np.float(max_yr) /
            totpers, max_yr, totpers) -
            (0.5 * np.float(max_yr) / totpers))

        fig, ax = plt.subplots()
        plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o',
            label='Data')
        plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d',
            label='Model period (integrated)')
        plt.plot(age_fine, fert_fine, label='Cubic spline')
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        # plt.title('Fitted fertility rate function by age ($f_{s}$)',
        #     fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r'Fertility rate $f_{s}$')
        plt.xlim((min_yr-1, max_yr+1))
        plt.ylim((-0.15*(fert_fine_pred.max()),
            1.15*(fert_fine_pred.max())))
        plt.legend(loc='upper right')
        plt.text(-5, -0.018,
            "Source: National Vital Statistics Reports, Volume 64, Number 1, January 15, 2015.",
            fontsize=9)
        plt.tight_layout(rect=(0, 0.03, 1, 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) == False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "fert_rates")
        plt.savefig(output_path)
        # plt.show()

    return fert_rates
Example #11
0
def get_mort(totpers, min_yr, max_yr, graph=False):
    '''
    --------------------------------------------------------------------
    This function generates a vector of mortality rates by model period
    age.
    (Source: Male and Female death probabilities Actuarial Life table,
    2011 Social Security Administration,
    http://www.ssa.gov/oact/STATS/table4c6.html)
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, total number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utils.read_file()
        mort_rates2011.csv

    OBJECTS CREATED WITHIN FUNCTION:
    infmort_rate    = scalar > 0, infant mortality rate from 2015 U.S.
                      CIA World Factbook
    cur_path        = string, path where function calling file resides
    mort_file       = string, path of mortality rate data source (.csv)
    mort_data       = 120 x 7 DataFrame, 2011 mortality rate data for
                      men and women
    age_year_all    = (114,) vector, ages by year for which total
                      mortality have positive population weight
    mort_rates_all  = (114,) vector, mortality rates by all ages with
                      positive population weight
    mort_rates_mxyr = (100,) vector, truncated mortality rates by age
    binsize         = scalar > 0, size of each model period bin in data
                      years
    num_sub_bins    = scalar, an arbitrarily and deliberately large
                      number of sub-bins that each population bin will
                      be broken up into
    len_subbins     = scalar, length of a model period in data sub-bins
    mort_rates_sub  = (num_sub_bins*100,) vector, mortality rates by
                      sub-bin implied by mort_rates_mxyr
    mort_rates      = (totpers,) vector, mortality rates that correspond
                      to each period of life
    i               = integer >= 0, index of model period being computed
    beg_sub_bin     = integer >= 0, index of beginning sub-bin for
                      calculation of cumulative mortality rate of given
                      model period
    end_sub_bin     = integer >= 0, index of ending sub-bin + 1 for
                      calculation of cumulative mortality rate of given
                      model period

    FILES CREATED BY THIS FUNCTION:
        mort_rates.png

    RETURNS: mort_rates, infmort_rate
    --------------------------------------------------------------------
    '''
    # Get mortality rate by age data
    infmort_rate = 0.00587 # taken from 2015 U.S. infant mortality rate
    cur_path = os.path.split(os.path.abspath(__file__))[0]
    mort_file = utils.read_file(cur_path,
                'data/demographic/mort_rates2011.csv')
    mort_data = pd.read_table(mort_file, sep=',', thousands=',')
    age_year_all = mort_data['Age'] + 1
    mort_rates_all = (((mort_data['Male Mort. Rate'] *
        mort_data['Num. Male Lives']) + (mort_data['Female Mort. Rate']
        * mort_data['Num. Female Lives'])) /
        (mort_data['Num. Male Lives'] + mort_data['Num. Female Lives']))
    age_year_all = age_year_all[np.isfinite(mort_rates_all)]
    mort_rates_all = mort_rates_all[np.isfinite(mort_rates_all)]
    # Calculate implied mortality rates in sub-bins of mort_rates_all.
    mort_rates_mxyr = mort_rates_all[0:max_yr]
    binsize = (max_yr - min_yr + 1) / totpers
    num_sub_bins = int(100)
    len_subbins = ((np.float64((max_yr - min_yr + 1) * num_sub_bins)) /
                  totpers)
    mort_rates_sub = np.zeros(num_sub_bins * max_yr, dtype=float)
    for i in range(max_yr):
        mort_rates_sub[i*num_sub_bins:(i+1)*num_sub_bins] =\
            (1 - ((1 - mort_rates_mxyr[i]) ** (1.0 / num_sub_bins)))
    mort_rates = np.zeros(totpers)
    end_sub_bin = 0
    for i in range(totpers):
        beg_sub_bin = int(end_sub_bin)
        end_sub_bin = int(np.rint((i + 1) * len_subbins))
        mort_rates[i] = (1 - (1 - (mort_rates_sub[beg_sub_bin:end_sub_bin])).prod())
    mort_rates[-1] = 1 # Mortality rate in last period is set to 1

    if graph == True:
        '''
        ----------------------------------------------------------------
        age_mid_new = (totpers,) vector, midpoint age of each model
                      period age bin
        output_fldr = string, folder in current path to save files
        output_dir  = string, total path of OUTPUT folder
        output_path = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        age_mid_new = (np.linspace(np.float(max_yr) /
            totpers, max_yr, totpers) -
            (0.5 * np.float(max_yr) / totpers))
        fig, ax = plt.subplots()
        plt.scatter(np.hstack([0, age_year_all]),
            np.hstack([infmort_rate, mort_rates_all]),
            s=20, c='blue', marker='o', label='Data')
        plt.scatter(np.hstack([0, age_mid_new]),
            np.hstack([infmort_rate, mort_rates]),
            s=40, c='red', marker='d', label='Model period (cumulative)')
        plt.plot(np.hstack([0, age_year_all[min_yr-1:max_yr]]),
            np.hstack([infmort_rate, mort_rates_all[min_yr-1:max_yr]]))
        plt.axvline(x=max_yr, color='red', linestyle='-', linewidth=1)
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator   = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65',linestyle='-')
        # plt.title('Fitted mortality rate function by age ($rho_{s}$)',
        #     fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r'Mortality rate $\rho_{s}$')
        plt.xlim((min_yr-2, age_year_all.max()+2))
        plt.ylim((-0.05, 1.05))
        plt.legend(loc='upper left')
        plt.text(-5, -0.2,
            "Source: Actuarial Life table, 2011 Social Security Administration.",
            fontsize=9)
        plt.tight_layout(rect=(0, 0.03, 1, 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = "OUTPUT/Demographics"
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) == False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, "mort_rates")
        plt.savefig(output_path)
        # plt.show()

    return mort_rates, infmort_rate
Example #12
0
def get_fert(totpers, min_yr, max_yr, graph=False):
    '''
    --------------------------------------------------------------------
    This function generates a vector of fertility rates by model period
    age that corresponds to the fertility rate data by age in years
    --------------------------------------------------------------------
    INPUTS:
    totpers = integer >= 3, total number of agent life periods (E+S)
    min_yr  = integer >= 0, age in years at which agents are born,
              minimum age
    max_yr  = integer >= 4, age in years at which agents die with
              certainty, maximum age
    graph   = boolean, =True if want graphical output

    OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION:
        utlis.read_file()
        pop_data.csv

    OBJECTS CREATED WITHIN FUNCTION:
    cur_path       = string, path in which calling file resides
    pop_file       = string, path of population data source csv file
    pop_data       = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 100
    pop_data_samp  = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012,
                     Pop2013, for ages 0 to 99
    age_year_all   = (100,) vector, ages by year from data (beg per=1)
    curr_pop       = (100,) vector, population for ages 0 to 99 in 2013
    curr_pop_pct   = (100,) vector, population (in percent) for ages 0
                     to 99 in 2013
    fert_data      = (13,) vector, fertility rates for given age bins.
                     We divide numbers by 2,000 because original data is
                     in births per 1000 women. We assume an equal number
                     of men. Added two zeros on the front and on the
                     back to make spline interpolation work right
    age_midp       = (13,) vector, midpoint age of age bins ranges from
                     original data (9, 10, 10-14, 15-17, 18-19, 20-24,
                     25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The
                     first two and last two are not data
    fert_func      = function, generated by interp1d function, takes
                     ages and returns the interpolated fertility rates
    binsize        = scalar > 0, size of each model period bin in data
                     years
    num_sub_bins   = scalar, an arbitrarily and deliberately large
                     number of sub-bins that each population bin will be
                     broken up into
    len_subbins    = scalar, length of a model period in data sub-bins
    age_sub        = (num_sub_bins*100,) vector, midpoint ages of each
                     data sub-bin
    curr_pop_sub   = (num_sub_bins*100,) vector, population linearly
                     interpolated from data in each sub-bin
    fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub-
                     bin interpolated from fert_func()
    pred_ind       = (num_sub_bins*100,) boolean vector, =True if period
                     is one that must be interpolated
    age_pred       = (num_sub_bins*100-some,) vector, midpoint age in
                     years corresponding to each period to be
                     interpolated
    fert_rates     = (totpers,) vector, fertility rates for each model
                     period of life
    i              = integer >= 0, index of model period being computed
    beg_sub_bin    = integer >= 0, index of beginning sub-bin for
                     calculation of average fertility rate of given
                     model period
    end_sub_bin    = integer >= 0, index of ending sub-bin + 1 for
                     calculation of average fertility rate of given
                     model period

    FILES CREATED BY THIS FUNCTION:
        fert_rates.png

    RETURNS: fert_rates
    --------------------------------------------------------------------
    '''
    # Get current population data for weighting
    pop_file = utils.read_file(cur_path, pop_dir)
    pop_data = pd.read_csv(pop_file,
                           sep=r'\s+',
                           usecols=['Year', 'Age', 'Total'])
    pop_data = select_pop_data(pop_data)
    pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1)
                             & (pop_data['Age'] <= max_yr - 1)]
    curr_pop = np.array(pop_data_samp[pop_data_samp['Year'] == 2014]['Total'],
                        dtype='f')
    curr_pop_pct = curr_pop / curr_pop.sum(
    )  # pct population of that age group within same year

    # Get fertility rate by age-bin data
    fert_data = pd.read_csv(fert_dir, sep=',\s*',\
        usecols=['Year1', 'Age', 'ASFR', 'AgeDef',\
                        'Collection', 'RefCode'])
    fert_data = select_fert_data(fert_data)
    fert_list = []
    for i in range(14, 51):
        age = fert_data[fert_data['Age'] == i]
        data = age[age['Year'].isin(range(1990, 2015))]
        fert_list.append(data['Values'].mean())
    fert_data = fert_data[fert_data['Year'] == 1995]
    fert_data['Values'] = fert_list
    fert_data['Values'] = fert_data['Values'] / 2

    # Generate interpolation functions for fertility rates
    fert_func = si.splrep(fert_data['Age'], fert_data['Values'])

    #### AGE BIN CREATION
    # Calculate average fertility rate in each age bin using trapezoid
    # method with a large number of points in each bin.
    binsize = (max_yr - min_yr +
               1) / totpers  # creating different generations (I believe?)

    num_sub_bins = float(10000)
    len_subbins = (np.float64(100 * num_sub_bins)) / totpers
    # 100 (lifetime year) / totpers gives us size of bins. To get length of subbin shouldnt we dividing by num_sub_bins ????
    age_sub = (
        np.linspace(
            np.float64(binsize) /
            num_sub_bins,  # gives us the first subbin (len subbin)
            np.float64(max_yr),  # gives us end point
            int(num_sub_bins * max_yr)) - 0.5 *  #
        np.float64(binsize) / num_sub_bins)
    # gives us mid age of all subbins

    ### POPULATION CREATION
    ages = np.linspace(min_yr, max_yr, curr_pop_pct.shape[0])
    pop_func = si.splrep(ages, curr_pop_pct)
    new_bins = np.linspace(min_yr, max_yr,\
                            num_sub_bins * max_yr)
    curr_pop_sub = si.splev(new_bins, pop_func)
    curr_pop_sub = curr_pop_sub / curr_pop_sub.sum()
    fert_rates_sub = np.zeros(curr_pop_sub.shape)
    pred_ind = (age_sub > fert_data['Age'].iloc[0]) * (
        age_sub < fert_data['Age'].iloc[-1]
    )  # makes sure it is inside valid range
    age_pred = age_sub[
        pred_ind]  #gets age_sub in the valid range by applying pred_ind
    fert_rates_sub[pred_ind] = np.float64(si.splev(age_pred, fert_func))
    fert_rates_sub[fert_rates_sub < 0] = 0
    fert_rates = np.zeros(totpers)
    end_sub_bin = 0
    for i in range(totpers):
        beg_sub_bin = int(end_sub_bin)
        end_sub_bin = int(np.rint((i + 1) * len_subbins))
        fert_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] *
                          fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() /
                         curr_pop_sub[beg_sub_bin:end_sub_bin].sum())
    fert_rates = np.nan_to_num(fert_rates)

    if graph:
        '''
        ----------------------------------------------------------------
        age_fine_pred  = (300,) vector, equally spaced support of ages
                         between the minimum and maximum interpolating
                         ages
        fert_fine_pred = (300,) vector, interpolated fertility rates
                         based on age_fine_pred
        age_fine       = (300+some,) vector of ages including leading
                         and trailing zeros
        fert_fine      = (300+some,) vector of fertility rates including
                         leading and trailing zeros
        age_mid_new    = (totpers,) vector, midpoint age of each model
                         period age bin
        output_fldr    = string, folder in current path to save files
        output_dir     = string, total path of OUTPUT folder
        output_path    = string, path of file name of figure to be saved
        ----------------------------------------------------------------
        '''
        # Generate finer age vector and fertility rate vector for
        # graphing cubic spline interpolating function
        age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300)
        fert_fine_pred = fert_func(age_fine_pred)
        age_fine = np.hstack((min_yr, age_fine_pred, max_yr))
        fert_fine = np.hstack((0, fert_fine_pred, 0))
        age_mid_new = (
            np.linspace(np.float(max_yr) / totpers, max_yr, totpers) -
            (0.5 * np.float(max_yr) / totpers))

        fig, ax = plt.subplots()
        plt.scatter(age_midp,
                    fert_data,
                    s=70,
                    c='blue',
                    marker='o',
                    label='Data')
        plt.scatter(age_mid_new,
                    fert_rates,
                    s=40,
                    c='red',
                    marker='d',
                    label='Model period (integrated)')
        plt.plot(age_fine, fert_fine, label='Cubic spline')
        # for the minor ticks, use no labels; default NullFormatter
        minorLocator = MultipleLocator(1)
        ax.xaxis.set_minor_locator(minorLocator)
        plt.grid(b=True, which='major', color='0.65', linestyle='-')
        # plt.title('Fitted fertility rate function by age ($f_{s}$)',
        #     fontsize=20)
        plt.xlabel(r'Age $s$')
        plt.ylabel(r'Fertility rate $f_{s}$')
        plt.xlim((min_yr - 1, max_yr + 1))
        plt.ylim(
            (-0.15 * (fert_fine_pred.max()), 1.15 * (fert_fine_pred.max())))
        plt.legend(loc='upper right')
        plt.text(-5,
                 -0.018,
                 'Source: National Vital Statistics Reports, ' +
                 'Volume 64, Number 1, January 15, 2015.',
                 fontsize=9)
        plt.tight_layout(rect=(0, 0.03, 1, 1))
        # Create directory if OUTPUT directory does not already exist
        output_fldr = 'OUTPUT/Demographics'
        output_dir = os.path.join(cur_path, output_fldr)
        if os.access(output_dir, os.F_OK) is False:
            os.makedirs(output_dir)
        output_path = os.path.join(output_dir, 'fert_rates')
        plt.savefig(output_path)

    return fert_rates