def test_read_file_from_egg(): ''' Test of utils.read_file() function, case of reading file from .egg ''' path = os.path.join(CUR_PATH) fname = 'default_parameters.json' bytes_data = utils.read_file(path, fname) assert isinstance(bytes_data, io.StringIO)
def test_read_file(): ''' Test of utils.read_file() function ''' path = os.path.join(CUR_PATH, 'test_io_data') fname = 'SS_fsolve_inputs.pkl' bytes_data = utils.read_file(path, fname) assert isinstance(bytes_data, io.TextIOWrapper)
def ss_profiles(base_ss, base_params, reform_ss=None, reform_params=None, by_j=True, var='nssmat', plot_data=False, plot_title=None, path=None): ''' Plot lifecycle profiles of given variable in the SS. Args: base_ss (dictionary): SS output from baseline run base_params (OG-USA Specifications class): baseline parameters object reform_ss (dictionary): SS output from reform run reform_params (OG-USA Specifications class): reform parameters object var (string): name of variable to plot plot_data (bool): whether to plot data values for given variable plot_title (string): title for plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of lifecycle profiles ''' if reform_ss: assert (base_params.S == reform_params.S) assert (base_params.starting_age == reform_params.starting_age) assert (base_params.ending_age == reform_params.ending_age) age_vec = np.arange(base_params.starting_age, base_params.starting_age + base_params.S) fig1, ax1 = plt.subplots() if by_j: cm = plt.get_cmap('coolwarm') ax1.set_prop_cycle(color=[cm(1. * i / 7) for i in range(7)]) for j in range(base_params.J): plt.plot(age_vec, base_ss[var][:, j], label='Baseline, j = ' + str(j)) if reform_ss: plt.plot(age_vec, reform_ss[var][:, j], label='Reform, j = ' + str(j), linestyle='--') else: base_var = ( base_ss[var][:, :] * base_params.lambdas.reshape(1, base_params.J)).sum(axis=1) plt.plot(age_vec, base_var, label='Baseline') if reform_ss: reform_var = ( reform_ss[var][:, :] * reform_params.lambdas.reshape(1, reform_params.J)).sum(axis=1) plt.plot(age_vec, reform_var, label='Reform', linestyle='--') if plot_data: assert var == 'nssmat' labor_file = utils.read_file( cur_path, "data/labor/cps_hours_by_age_hourspct.txt") data = pd.read_csv(labor_file, header=0, delimiter='\t') piv = data.pivot(index='age', columns='hours_pct', values='mean_hrs') lab_mat_basic = np.array(piv) lab_mat_basic /= np.nanmax(lab_mat_basic) piv2 = data.pivot(index='age', columns='hours_pct', values='num_obs') weights = np.array(piv2) weights /= np.nansum(weights, axis=1).reshape( 60, 1) weighted = np.nansum((lab_mat_basic * weights), axis=1) weighted = np.append(weighted, np.zeros(20)) weighted[60:] = np.nan plt.plot(age_vec, weighted, linewidth=2.0, label='Data', linestyle=':') plt.xlabel(r'Age') plt.ylabel(VAR_LABELS[var]) plt.legend(loc=9, bbox_to_anchor=(0.5, -0.15), ncol=2) if plot_title: plt.title(plot_title, fontsize=15) if path: fig_path1 = os.path.join(path) plt.savefig(fig_path1, bbox_inches="tight") else: return fig1 plt.close()
def get_imm_resid(totpers, min_yr, max_yr, graph=True): ''' -------------------------------------------------------------------- Calculate immigration rates by age as a residual given population levels in different periods, then output average calculated immigration rate. We have to replace the first mortality rate in this function in order to adjust the first implied immigration rate (Source: Population data come from Annual Estimates of the Resident Population by Single Year of Age and Sex: April 1, 2010 to July 1, 2013 (Both sexes) National Characteristics, Vintage 2013, US Census Bureau, http://www.census.gov/popest/data/national/asrh/2013/index.html) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utils.read_file() get_fert() get_mort() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data (beg per is 1) pop_2010 = (100,) vector, population for ages 0 to 99 in 2010 pop_2011 = (100,) vector, population for ages 0 to 99 in 2011 pop_2012 = (100,) vector, population for ages 0 to 99 in 2012 pop_2013 = (100,) vector, population for ages 0 to 99 in 2013 imm_mat = (3, 100) matrix, immigration rates computed as residuals for each age in three successive pairs of years pop11vec = (3,) vector, age-1 population in first three years pop21vec = (3,) vector, age-1 population in last three years fert_rates = (100,) vector, fertility rates by model age mort_rates = (100,) vector, mortality rates by model age infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook newbornvec = (3,) vector, total births in first three years pop11mat = (3, 99) matrix, population of age 1 through 99 for first three years pop12mat = (3, 99) matrix, population of age 2 through 100 for first three years pop22mat = (3, 99) matrix, population of age 2 through 100 for last three years mort_mat = (3, 99) matrix, the first 99 mortality rates copied into 3 rows imm_rates_all = (100,) vector, average of three years residual immigration rates by each age in data imm_func = function, generated by interp1d function, takes ages and returns the interpolated immigration rates age_per = (E+S,) vector, age in years at each period of life imm_rates = (E+S,) vector, immigration rates that correspond to each period of life RETURNS: imm_rates -------------------------------------------------------------------- ''' cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_csv(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] pop_2010, pop_2011, pop_2012, pop_2013 = ( np.array(pop_data_samp['2010'], dtype='f'), np.array(pop_data_samp['2011'], dtype='f'), np.array(pop_data_samp['2012'], dtype='f'), np.array(pop_data_samp['2013'], dtype='f')) pop_2010_EpS = pop_rebin(pop_2010, totpers) pop_2011_EpS = pop_rebin(pop_2011, totpers) pop_2012_EpS = pop_rebin(pop_2012, totpers) pop_2013_EpS = pop_rebin(pop_2013, totpers) # Create three years of estimated immigration rates for youngest age # individuals imm_mat = np.zeros((3, totpers)) pop11vec = np.array([pop_2010_EpS[0], pop_2011_EpS[0], pop_2012_EpS[0]]) pop21vec = np.array([pop_2011_EpS[0], pop_2012_EpS[0], pop_2013_EpS[0]]) fert_rates = get_fert(totpers, min_yr, max_yr, False) mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False) newbornvec = np.dot(fert_rates, np.vstack((pop_2010_EpS, pop_2011_EpS, pop_2012_EpS)).T) imm_mat[:, 0] = ((pop21vec - (1 - infmort_rate) * newbornvec) / pop11vec) # Estimate 3 years of immigration rates for all other-aged # individuals pop11mat = np.vstack((pop_2010_EpS[:-1], pop_2011_EpS[:-1], pop_2012_EpS[:-1])) pop12mat = np.vstack((pop_2010_EpS[1:], pop_2011_EpS[1:], pop_2012_EpS[1:])) pop22mat = np.vstack((pop_2011_EpS[1:], pop_2012_EpS[1:], pop_2013_EpS[1:])) mort_mat = np.tile(mort_rates[:-1], (3, 1)) imm_mat[:, 1:] = (pop22mat - (1 - mort_mat) * pop11mat) / pop12mat # Final estimated immigration rates are the averages over 3 years imm_rates = imm_mat.mean(axis=0) age_per = np.linspace(1, totpers, totpers) if graph: ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' fig, ax = plt.subplots() plt.scatter(age_per, imm_rates, s=40, c='red', marker='d') plt.plot(age_per, imm_rates) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted immigration rates by age ($i_{s}$), residual', # fontsize=20) plt.xlabel(r'Age $s$ (model periods)') plt.ylabel(r'Imm. rate $i_{s}$') plt.xlim((0, totpers + 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "imm_rates_orig") plt.savefig(output_path) # plt.show() return imm_rates
def get_pop_objs(E, S, T, min_yr, max_yr, curr_year, GraphDiag=True): ''' -------------------------------------------------------------------- This function produces the demographics objects to be used in the OG-USA model package. -------------------------------------------------------------------- INPUTS: E = integer >= 1, number of model periods in which agent is not economically active S = integer >= 3, number of model periods in which agent is economically active T = integer > 2*S, number of periods to be simulated in TPI min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age curr_year = integer >= 2016, current year for which analysis will begin GraphDiag = boolean, =True if want graphical output and printed diagnostics OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: get_fert() get_mort() get_imm_resid() utils.read_file() pop_rebin() immsolve() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: age_per = (E+S,) vector, age in years at each period of life fert_rates = (E+S,) vector, fertility rates that correspond to each model period of life mort_rates = (E+S,) vector, mortality rates that correspond to each model period of life infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook mort_rates_S = (S,) vector, mortality rates that correspond to each economically active model period of life imm_rates_orig = (E+S,) vector, immigration rates by age estimated as residuals from get_imm_resid() OMEGA_orig = (E+S, E+S) matrix, transition matrix for population distribution law of motion eigvalues = (E+S,) vector, eigenvalues of OMEGA matrix eigvectors = (E+S, E+S) matrix, matrix of eigenvectors of OMEGA where each column is the eigenvector that goes with the corresponding eigenvalue in eigvalues g_n_SS_orig = scalar, steady-state population growth rate, which is the largest real part of the eigenvalues eigvec_raw = (E+S,) vector, nonnormalized eigenvector corresponding to the largest real-part eigenvalue omega_SS_orig = (E+S,) vector, steady-state population distribution which is normalized eigvec_raw omega_path_orig = (E+S, T) matrix, time path of the population distribution from the current state to the steady- state cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data, beg per=1 pop_2013 = (100,) vector, population for ages 0 to 99 in 2013 age_per_EpS = (E+S,) vector, period numbers 1 through E+S pop_2013_EpS = (E+S,) vector, population distribution by model periods E + S in levels pop_2013_pct = (E+S,) vector, 2013 population distribution in percentages pop_curr = (E+S,) vector, current-period population distribution in percentages data_year = integer, most recent year in data per = integer, index for period pop_next = (E+S,) vector, next-period population distribution imm_tol = scalar > 0, tolerance for fsolve in immsolve() fixper = ? omega_SSfx = ? imm_objs = ? imm_fulloutput = ? imm_rates_adj = ? imm_diagdict = ? omega_path_S = ? imm_rates_S = ? imm_rates_S_adj = ? RETURNS: omega_path_S.T, g_n_SS, omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S, mort_rates_S, g_n_path, imm_rates_mat -------------------------------------------------------------------- ''' # age_per = np.linspace(min_yr, max_yr, E+S) fert_rates = get_fert(E + S, min_yr, max_yr, graph=False) mort_rates, infmort_rate = get_mort(E + S, min_yr, max_yr, graph=False) mort_rates_S = mort_rates[-S:] imm_rates_orig = get_imm_resid(E + S, min_yr, max_yr, graph=False) OMEGA_orig = np.zeros((E + S, E + S)) OMEGA_orig[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack((imm_rates_orig[0], np.zeros(E+S-1)))) OMEGA_orig[1:, :-1] += np.diag(1 - mort_rates[:-1]) OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:]) # Solve for steady-state population growth rate and steady-state # population distribution by age using eigenvalue and eigenvector # decomposition eigvalues, eigvectors = np.linalg.eig(OMEGA_orig) g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1 eigvec_raw =\ eigvectors[:, (eigvalues[np.isreal(eigvalues)].real).argmax()].real omega_SS_orig = eigvec_raw / eigvec_raw.sum() # Generate time path of the nonstationary population distribution omega_path_lev = np.zeros((E + S, T + S)) cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_csv(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] pop_2013 = np.array(pop_data_samp['2013'], dtype='f') # Generate the current population distribution given that E+S might # be less than max_yr-min_yr+1 age_per_EpS = np.arange(1, E + S + 1) pop_2013_EpS = pop_rebin(pop_2013, E + S) pop_2013_pct = pop_2013_EpS / pop_2013_EpS.sum() # Age most recent population data to the current year of analysis pop_curr = pop_2013_EpS.copy() data_year = 2013 pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[-S:].sum()) # g_n in 2013 pop_past = pop_curr # assume 2012-2013 pop # Age the data to the current year for per in range(curr_year - data_year): pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum()) / pop_curr[-S:].sum()) pop_past = pop_curr pop_curr = pop_next # Generate time path of the population distribution omega_path_lev[:, 0] = pop_curr.copy() for per in range(1, T + S): pop_next = np.dot(OMEGA_orig, pop_curr) omega_path_lev[:, per] = pop_next.copy() pop_curr = pop_next.copy() # Force the population distribution after 1.5*S periods to be the # steady-state distribution by adjusting immigration rates, holding # constant mortality, fertility, and SS growth rates imm_tol = 1e-14 fixper = int(1.5 * S) omega_SSfx = (omega_path_lev[:, fixper] / omega_path_lev[:, fixper].sum()) imm_objs = (fert_rates, mort_rates, infmort_rate, omega_path_lev[:, fixper], g_n_SS) imm_fulloutput = opt.fsolve(immsolve, imm_rates_orig, args=(imm_objs), full_output=True, xtol=imm_tol) imm_rates_adj = imm_fulloutput[0] imm_diagdict = imm_fulloutput[1] omega_path_S = (omega_path_lev[-S:, :] / np.tile(omega_path_lev[-S:, :].sum(axis=0), (S, 1))) omega_path_S[:, fixper:] = \ np.tile(omega_path_S[:, fixper].reshape((S, 1)), (1, T + S - fixper)) g_n_path = np.zeros(T + S) g_n_path[0] = g_n_curr.copy() g_n_path[1:] = ((omega_path_lev[-S:, 1:].sum(axis=0) - omega_path_lev[-S:, :-1].sum(axis=0)) / omega_path_lev[-S:, :-1].sum(axis=0)) g_n_path[fixper + 1:] = g_n_SS omega_S_preTP = (pop_past.copy()[-S:]) / (pop_past.copy()[-S:].sum()) imm_rates_mat = np.hstack(( np.tile(np.reshape(imm_rates_orig[E:], (S, 1)), (1, fixper)), np.tile(np.reshape(imm_rates_adj[E:], (S, 1)), (1, T + S - fixper)))) if GraphDiag: # Check whether original SS population distribution is close to # the period-T population distribution omegaSSmaxdif = np.absolute(omega_SS_orig - (omega_path_lev[:, T] / omega_path_lev[:, T].sum())).max() if omegaSSmaxdif > 0.0003: print("POP. WARNING: Max. abs. dist. between original SS " + "pop. dist'n and period-T pop. dist'n is greater than" + " 0.0003. It is " + str(omegaSSmaxdif) + ".") else: print("POP. SUCCESS: orig. SS pop. dist is very close to " + "period-T pop. dist'n. The maximum absolute " + "difference is " + str(omegaSSmaxdif) + ".") # Plot the adjusted steady-state population distribution versus # the original population distribution. The difference should be # small omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max() if omegaSSvTmaxdiff > 0.0003: print("POP. WARNING: The maximimum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is" + str(omegaSSvTmaxdiff) + ", " + "which is greater than 0.0003.") else: print("POP. SUCCESS: The maximum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is " + str(omegaSSvTmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n") plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title( 'Original steady-state population distribution vs. fixed', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc='upper right') # Create directory if OUTPUT directory does not already exist ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' cur_path = os.path.split(os.path.abspath(__file__))[0] output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "OrigVsFixSSpop") plt.savefig(output_path) plt.show() # Print whether or not the adjusted immigration rates solved the # zero condition immtol_solved = \ np.absolute(imm_diagdict['fvec'].max()) < imm_tol if immtol_solved: print("POP. SUCCESS: Adjusted immigration rates solved " + "with maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + ", which is less than the tolerance of " + str(imm_tol)) else: print("POP. WARNING: Adjusted immigration rates did not " + "solve. Maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + " is greater than the tolerance of " + str(imm_tol)) # Test whether the steady-state growth rates implied by the # adjusted OMEGA matrix equals the steady-state growth rate of # the original OMEGA matrix OMEGA2 = np.zeros((E + S, E + S)) OMEGA2[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack((imm_rates_adj[0], np.zeros(E+S-1)))) OMEGA2[1:, :-1] += np.diag(1 - mort_rates[:-1]) OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:]) eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2) g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1 if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8): print("FAILURE: The steady-state population growth rate" + " from adjusted OMEGA is different (diff is " + str(g_n_SS_adj - g_n_SS) + ") than the steady-" + "state population growth rate from the original" + " OMEGA.") elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8): print("SUCCESS: The steady-state population growth rate" + " from adjusted OMEGA is close to (diff is " + str(g_n_SS_adj - g_n_SS) + ") the steady-" + "state population growth rate from the original" + " OMEGA.") # Do another test of the adjusted immigration rates. Create the # new OMEGA matrix implied by the new immigration rates. Plug in # the adjusted steady-state population distribution. Hit is with # the new OMEGA transition matrix and it should return the new # steady-state population distribution omega_new = np.dot(OMEGA2, omega_SSfx) omega_errs = np.absolute(omega_new - omega_SSfx) print("The maximum absolute difference between the adjusted " + "steady-state population distribution and the " + "distribution generated by hitting the adjusted OMEGA " + "transition matrix is " + str(omega_errs.max())) # Plot the original immigration rates versus the adjusted # immigration rates immratesmaxdiff = \ np.absolute(imm_rates_orig - imm_rates_adj).max() print("The maximum absolute distance between any two points " + "of the original immigration rates and adjusted " + "immigration rates is " + str(immratesmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates") plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title( 'Original immigration rates vs. adjusted', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Imm. rates $i_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc='upper center') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "OrigVsAdjImm") plt.savefig(output_path) plt.show() # Plot population distributions for data_year, curr_year, # curr_year+20, omega_SSfx, and omega_SS_orig fig, ax = plt.subplots() plt.plot(age_per_EpS, pop_2013_pct, label="2013 pop.") plt.plot(age_per_EpS, (omega_path_lev[:, 0] / omega_path_lev[:, 0].sum()), label=str(curr_year) + " pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(0.5 * S)] / omega_path_lev[:, int(0.5 * S)].sum()), label="T=" + str(int(0.5 * S)) + " pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(S)] / omega_path_lev[:, int(S)].sum()), label="T=" + str(int(S)) + " pop.") plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') plt.title( 'Population distribution at points in time path', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E+S+1)) plt.legend(loc='lower left') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "PopDistPath") plt.savefig(output_path) plt.show() # return omega_path_S, g_n_SS, omega_SSfx, survival rates, # mort_rates_S, and g_n_path return (omega_path_S.T, g_n_SS, omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S, mort_rates_S, g_n_path, imm_rates_mat.T, omega_S_preTP)
def get_mort(totpers, min_yr, max_yr, graph=False): ''' -------------------------------------------------------------------- This function generates a vector of mortality rates by model period age. (Source: Male and Female death probabilities Actuarial Life table, 2011 Social Security Administration, http://www.ssa.gov/oact/STATS/table4c6.html) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, total number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utils.read_file() mort_rates2011.csv OBJECTS CREATED WITHIN FUNCTION: infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook cur_path = string, path where function calling file resides mort_file = string, path of mortality rate data source (.csv) mort_data = 120 x 7 DataFrame, 2011 mortality rate data for men and women age_year_all = (114,) vector, ages by year for which total mortality have positive population weight mort_rates_all = (114,) vector, mortality rates by all ages with positive population weight mort_rates_mxyr = (100,) vector, truncated mortality rates by age binsize = scalar > 0, size of each model period bin in data years num_sub_bins = scalar, an arbitrarily and deliberately large number of sub-bins that each population bin will be broken up into len_subbins = scalar, length of a model period in data sub-bins mort_rates_sub = (num_sub_bins*100,) vector, mortality rates by sub-bin implied by mort_rates_mxyr mort_rates = (totpers,) vector, mortality rates that correspond to each period of life i = integer >= 0, index of model period being computed beg_sub_bin = integer >= 0, index of beginning sub-bin for calculation of cumulative mortality rate of given model period end_sub_bin = integer >= 0, index of ending sub-bin + 1 for calculation of cumulative mortality rate of given model period FILES CREATED BY THIS FUNCTION: mort_rates.png RETURNS: mort_rates, infmort_rate -------------------------------------------------------------------- ''' # Get mortality rate by age data infmort_rate = 0.00587 # taken from 2015 U.S. infant mortality rate cur_path = os.path.split(os.path.abspath(__file__))[0] mort_file = utils.read_file( cur_path, 'data/demographic/mort_rates2011.csv') mort_data = pd.read_csv(mort_file, sep=',', thousands=',') age_year_all = mort_data['Age'] + 1 mort_rates_all = ( ((mort_data['Male Mort. Rate'] * mort_data['Num. Male Lives']) + (mort_data['Female Mort. Rate'] * mort_data['Num. Female Lives'])) / (mort_data['Num. Male Lives'] + mort_data['Num. Female Lives'])) age_year_all = age_year_all[np.isfinite(mort_rates_all)] mort_rates_all = mort_rates_all[np.isfinite(mort_rates_all)] # Calculate implied mortality rates in sub-bins of mort_rates_all. mort_rates_mxyr = mort_rates_all[0:max_yr] num_sub_bins = int(100) len_subbins = ((np.float64((max_yr - min_yr + 1) * num_sub_bins)) / totpers) mort_rates_sub = np.zeros(num_sub_bins * max_yr, dtype=float) for i in range(max_yr): mort_rates_sub[i * num_sub_bins:(i + 1) * num_sub_bins] =\ (1 - ((1 - mort_rates_mxyr[i]) ** (1.0 / num_sub_bins))) mort_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) mort_rates[i] = ( 1 - (1 - (mort_rates_sub[beg_sub_bin:end_sub_bin])).prod()) mort_rates[-1] = 1 # Mortality rate in last period is set to 1 if graph: ''' ---------------------------------------------------------------- age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(np.hstack([0, age_year_all]), np.hstack([infmort_rate, mort_rates_all]), s=20, c='blue', marker='o', label='Data') plt.scatter(np.hstack([0, age_mid_new]), np.hstack([infmort_rate, mort_rates]), s=40, c='red', marker='d', label='Model period (cumulative)') plt.plot(np.hstack([0, age_year_all[min_yr - 1:max_yr]]), np.hstack([infmort_rate, mort_rates_all[min_yr - 1:max_yr]])) plt.axvline(x=max_yr, color='red', linestyle='-', linewidth=1) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted mortality rate function by age ($rho_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Mortality rate $\rho_{s}$') plt.xlim((min_yr-2, age_year_all.max()+2)) plt.ylim((-0.05, 1.05)) plt.legend(loc='upper left') plt.text(-5, -0.2, "Source: Actuarial Life table, 2011 Social Security " + "Administration.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "mort_rates") plt.savefig(output_path) # plt.show() return mort_rates, infmort_rate
def get_fert(totpers, min_yr, max_yr, graph=False): ''' -------------------------------------------------------------------- This function generates a vector of fertility rates by model period age that corresponds to the fertility rate data by age in years (Source: National Vital Statistics Reports, Volume 64, Number 1, January 15, 2015, Table 3, final 2013 data http://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_01.pdf) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, total number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utlis.read_file() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data (beg per=1) curr_pop = (100,) vector, population for ages 0 to 99 in 2013 curr_pop_pct = (100,) vector, population (in percent) for ages 0 to 99 in 2013 fert_data = (13,) vector, fertility rates for given age bins. We divide numbers by 2,000 because original data is in births per 1000 women. We assume an equal number of men. Added two zeros on the front and on the back to make spline interpolation work right age_midp = (13,) vector, midpoint age of age bins ranges from original data (9, 10, 10-14, 15-17, 18-19, 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The first two and last two are not data fert_func = function, generated by interp1d function, takes ages and returns the interpolated fertility rates binsize = scalar > 0, size of each model period bin in data years num_sub_bins = scalar, an arbitrarily and deliberately large number of sub-bins that each population bin will be broken up into len_subbins = scalar, length of a model period in data sub-bins age_sub = (num_sub_bins*100,) vector, midpoint ages of each data sub-bin curr_pop_sub = (num_sub_bins*100,) vector, population linearly interpolated from data in each sub-bin fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub- bin interpolated from fert_func() pred_ind = (num_sub_bins*100,) boolean vector, =True if period is one that must be interpolated age_pred = (num_sub_bins*100-some,) vector, midpoint age in years corresponding to each period to be interpolated fert_rates = (totpers,) vector, fertility rates for each model period of life i = integer >= 0, index of model period being computed beg_sub_bin = integer >= 0, index of beginning sub-bin for calculation of average fertility rate of given model period end_sub_bin = integer >= 0, index of ending sub-bin + 1 for calculation of average fertility rate of given model period FILES CREATED BY THIS FUNCTION: fert_rates.png RETURNS: fert_rates -------------------------------------------------------------------- ''' # Get current population data (2013) for weighting cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_csv(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] curr_pop = np.array(pop_data_samp['2013'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum() # Get fertility rate by age-bin data fert_data = (np.array([0.0, 0.0, 0.3, 12.3, 47.1, 80.7, 105.5, 98.0, 49.3, 10.4, 0.8, 0.0, 0.0]) / 2000) age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47, 55, 56]) # Generate interpolation functions for fertility rates fert_func = si.interp1d(age_midp, fert_data, kind='cubic') # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers age_sub = (np.linspace(np.float64(binsize) / num_sub_bins, np.float64(max_yr), int(num_sub_bins*max_yr)) - 0.5 * np.float64(binsize) / num_sub_bins) curr_pop_sub = np.repeat(np.float64(curr_pop_pct) / num_sub_bins, num_sub_bins) fert_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1]) age_pred = age_sub[pred_ind] fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred)) fert_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) fert_rates[i] = (( curr_pop_sub[beg_sub_bin:end_sub_bin] * fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) if graph: ''' ---------------------------------------------------------------- age_fine_pred = (300,) vector, equally spaced support of ages between the minimum and maximum interpolating ages fert_fine_pred = (300,) vector, interpolated fertility rates based on age_fine_pred age_fine = (300+some,) vector of ages including leading and trailing zeros fert_fine = (300+some,) vector of fertility rates including leading and trailing zeros age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' # Generate finer age vector and fertility rate vector for # graphing cubic spline interpolating function age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) fert_fine_pred = fert_func(age_fine_pred) age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) fert_fine = np.hstack((0, fert_fine_pred, 0)) age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o', label='Data') plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d', label='Model period (integrated)') plt.plot(age_fine, fert_fine, label='Cubic spline') # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted fertility rate function by age ($f_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Fertility rate $f_{s}$') plt.xlim((min_yr - 1, max_yr + 1)) plt.ylim((-0.15 * (fert_fine_pred.max()), 1.15 * (fert_fine_pred.max()))) plt.legend(loc='upper right') plt.text(-5, -0.018, "Source: National Vital Statistics Reports, " + "Volume 64, Number 1, January 15, 2015.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "fert_rates") plt.savefig(output_path) return fert_rates
def get_pop_objs(E, S, T, min_yr, max_yr, curr_year, GraphDiag=True): ''' -------------------------------------------------------------------- This function produces the demographics objects to be used in the OG-USA model package. -------------------------------------------------------------------- INPUTS: E = integer >= 1, number of model periods in which agent is not economically active S = integer >= 3, number of model periods in which agent is economically active T = integer > 2*S, number of periods to be simulated in TPI min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age curr_year = integer >= 2016, current year for which analysis will begin GraphDiag = boolean, =True if want graphical output and printed diagnostics OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: get_fert() get_mort() get_imm_resid() utils.read_file() pop_rebin() immsolve() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: age_per = (E+S,) vector, age in years at each period of life fert_rates = (E+S,) vector, fertility rates that correspond to each model period of life mort_rates = (E+S,) vector, mortality rates that correspond to each model period of life infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook mort_rates_S = (S,) vector, mortality rates that correspond to each economically active model period of life imm_rates_orig = (E+S,) vector, immigration rates by age estimated as residuals from get_imm_resid() OMEGA_orig = (E+S, E+S) matrix, transition matrix for population distribution law of motion eigvalues = (E+S,) vector, eigenvalues of OMEGA matrix eigvectors = (E+S, E+S) matrix, matrix of eigenvectors of OMEGA where each column is the eigenvector that goes with the corresponding eigenvalue in eigvalues g_n_SS_orig = scalar, steady-state population growth rate, which is the largest real part of the eigenvalues eigvec_raw = (E+S,) vector, nonnormalized eigenvector corresponding to the largest real-part eigenvalue omega_SS_orig = (E+S,) vector, steady-state population distribution which is normalized eigvec_raw omega_path_orig = (E+S, T) matrix, time path of the population distribution from the current state to the steady- state cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data, beg per=1 pop_2013 = (100,) vector, population for ages 0 to 99 in 2013 age_per_EpS = (E+S,) vector, period numbers 1 through E+S pop_2013_EpS = (E+S,) vector, population distribution by model periods E + S in levels pop_2013_pct = (E+S,) vector, 2013 population distribution in percentages pop_curr = (E+S,) vector, current-period population distribution in percentages data_year = integer, most recent year in data per = integer, index for period pop_next = (E+S,) vector, next-period population distribution imm_tol = scalar > 0, tolerance for fsolve in immsolve() fixper = ? omega_SSfx = ? imm_objs = ? imm_fulloutput = ? imm_rates_adj = ? imm_diagdict = ? omega_path_S = ? imm_rates_S = ? imm_rates_S_adj = ? RETURNS: omega_path_S.T, g_n_SS, omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S, mort_rates_S, g_n_path, imm_rates_mat -------------------------------------------------------------------- ''' age_per = np.linspace(min_yr, max_yr, E+S) fert_rates = get_fert(E+S, min_yr, max_yr, graph=False) mort_rates, infmort_rate = get_mort(E+S, min_yr, max_yr, graph=False) mort_rates_S = mort_rates[-S:] imm_rates_orig = get_imm_resid(E+S, min_yr, max_yr, graph=False) #imm_rates_orig = np.zeros(E+S) imm_rates_S = imm_rates_orig[-S:] OMEGA_orig = np.zeros((E+S, E+S)) OMEGA_orig[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack((imm_rates_orig[0], np.zeros(E+S-1)))) OMEGA_orig[1:, :-1] += np.diag(1-mort_rates[:-1]) OMEGA_orig[1:, 1:] += np.diag(imm_rates_orig[1:]) # Solve for steady-state population growth rate and steady-state # population distribution by age using eigenvalue and eigenvector # decomposition eigvalues, eigvectors = np.linalg.eig(OMEGA_orig) g_n_SS = (eigvalues[np.isreal(eigvalues)].real).max() - 1 eigvec_raw = eigvectors[:, (eigvalues[np.isreal(eigvalues)].real).argmax()].real omega_SS_orig = eigvec_raw / eigvec_raw.sum() # Generate time path of the nonstationary population distribution omega_path_lev = np.zeros((E+S, T+S)) cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_table(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) & (pop_data['Age']<=max_yr-1)] age_year_all = pop_data_samp['Age'] + 1 pop_2013 = np.array(pop_data_samp['2013'], dtype='f') # Generate the current population distribution given that E+S might # be less than max_yr-min_yr+1 age_per_EpS = np.arange(1, E+S+1) pop_2013_EpS = pop_rebin(pop_2013, E+S) pop_2013_pct = pop_2013_EpS / pop_2013_EpS.sum() # Age most recent population data to the current year of analysis pop_curr = pop_2013_EpS.copy() data_year = 2013 pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum())/ pop_curr[-S:].sum()) # g_n in 2013 pop_past = pop_curr # assume 2012-2013 pop for per in range(curr_year - data_year): # Age the data to # the current year pop_next = np.dot(OMEGA_orig, pop_curr) g_n_curr = ((pop_next[-S:].sum() - pop_curr[-S:].sum())/ pop_curr[-S:].sum()) pop_past = pop_curr pop_curr = pop_next curr_dict = {"pop_" + str(curr_year) + "_pct": pop_curr.copy() / pop_curr.sum()} # Generate time path of the population distribution omega_path_lev[:,0] = pop_curr.copy() for per in range(1, T+S): pop_next = np.dot(OMEGA_orig, pop_curr) omega_path_lev[:, per] = pop_next.copy() pop_curr = pop_next.copy() # Force the population distribution after 1.5*S periods to be the # steady-state distribution by adjusting immigration rates, holding # constant mortality, fertility, and SS growth rates imm_tol = 1e-14 fixper = int(1.5*S) omega_SSfx = (omega_path_lev[:, fixper] / omega_path_lev[:, fixper].sum()) imm_objs = (fert_rates, mort_rates, infmort_rate, omega_path_lev[:, fixper], g_n_SS) imm_fulloutput = opt.fsolve(immsolve, imm_rates_orig, args=(imm_objs), full_output=True, xtol=imm_tol) imm_rates_adj = imm_fulloutput[0] #imm_rates_adj = np.zeros(E+S) imm_rates_S_adj = imm_rates_adj[-S:] imm_diagdict = imm_fulloutput[1] omega_path_S = (omega_path_lev[-S:, :] / np.tile(omega_path_lev[-S:, :].sum(axis=0),(S, 1))) omega_path_S[:, fixper:] = \ np.tile(omega_path_S[:, fixper].reshape((S, 1)), (1, T+S-fixper)) g_n_path = np.zeros(T+S) g_n_path[0] = g_n_curr.copy() g_n_path[1:] = ((omega_path_lev[-S:, 1:].sum(axis=0) - omega_path_lev[-S:, :-1].sum(axis=0)) / omega_path_lev[-S:, :-1].sum(axis=0)) g_n_path[fixper+1:] = g_n_SS omega_S_preTP = (pop_past.copy()[-S:])/(pop_past.copy()[-S:].sum()) imm_rates_mat = np.hstack(( np.tile(np.reshape(imm_rates_orig[E:],(S,1)), (1, fixper)), np.tile(np.reshape(imm_rates_adj[E:],(S,1)), (1, T+S-fixper)))) # omega_diffs_orig = (omega_path_S[1:,1:] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*np.tile(np.reshape(imm_rates_orig[E+1:],(S-1,1)),(1,T+S-1))*omega_path_S[1:,:-1]) # omega_diffs_adj = (omega_path_S[1:,1:] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*np.tile(np.reshape(imm_rates_adj[E+1:],(S-1,1)),(1,T+S-1))*omega_path_S[1:,:-1]) # omega_diffs_mixed = (omega_path_S[1:,1:] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*(1-np.tile(np.reshape(mort_rates_S[:-1],(S-1,1)),(1,T+S-1)))*omega_path_S[:-1,:-1] - # (1/(1+np.tile(np.reshape(g_n_path[1:],(1,T+S-1)),(S-1,1))))*imm_rates_mat[1:,:-1]*omega_path_S[1:,:-1]) # np.savetxt('omega_diffs_orig.csv', omega_diffs_orig, delimiter=',') # np.savetxt('omega_diffs_adj.csv', omega_diffs_adj, delimiter=',') # np.savetxt('omega_diffs_mixed.csv', omega_diffs_mixed, delimiter=',') if GraphDiag == True: # Check whether original SS population distribution is close to # the period-T population distribution omegaSSmaxdif = np.absolute(omega_SS_orig - (omega_path_lev[:,T] / omega_path_lev[:,T].sum())).max() if omegaSSmaxdif > 0.0003: print("POP. WARNING: Max. abs. dist. between original SS " + "pop. dist'n and period-T pop. dist'n is greater than" + " 0.0003. It is " + str(omegaSSmaxdif) + ".") else: print("POP. SUCCESS: orig. SS pop. dist is very close to " + "period-T pop. dist'n. The maximum absolute " + "difference is " + str(omegaSSmaxdif) + ".") # Plot the adjusted steady-state population distribution versus # the original population distribution. The difference should be # small omegaSSvTmaxdiff = np.absolute(omega_SS_orig - omega_SSfx).max() if omegaSSvTmaxdiff > 0.0003: print("POP. WARNING: The maximimum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is" + str(omegaSSvTmaxdiff) + ", "+ "which is greater than 0.0003.") else: print("POP. SUCCESS: The maximum absolute difference " + "between any two corresponding points in the original" + " and adjusted steady-state population " + "distributions is " + str(omegaSSvTmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n") plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') plt.title( 'Original steady-state population distribution vs. fixed', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E+S+1)) plt.legend(loc='upper right') # Create directory if OUTPUT directory does not already exist ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' cur_path = os.path.split(os.path.abspath(__file__))[0] output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) == False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "OrigVsFixSSpop") plt.savefig(output_path) plt.show() # Print whether or not the adjusted immigration rates solved the # zero condition immtol_solved = \ np.absolute(imm_diagdict['fvec'].max()) < imm_tol if immtol_solved == True: print("POP. SUCCESS: Adjusted immigration rates solved " + "with maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + ", which is less than the tolerance of " + str(imm_tol)) else: print("POP. WARNING: Adjusted immigration rates did not " + "solve. Maximum absolute error of " + str(np.absolute(imm_diagdict['fvec'].max())) + " is greater than the tolerance of " + str(imm_tol)) # Test whether the steady-state growth rates implied by the # adjusted OMEGA matrix equals the steady-state growth rate of # the original OMEGA matrix OMEGA2 = np.zeros((E+S, E+S)) OMEGA2[0, :] = ((1 - infmort_rate) * fert_rates + np.hstack((imm_rates_adj[0], np.zeros(E+S-1)))) OMEGA2[1:, :-1] += np.diag(1-mort_rates[:-1]) OMEGA2[1:, 1:] += np.diag(imm_rates_adj[1:]) eigvalues2, eigvectors2 = np.linalg.eig(OMEGA2) g_n_SS_adj = (eigvalues[np.isreal(eigvalues2)].real).max() - 1 if np.max(np.absolute(g_n_SS_adj - g_n_SS)) > 10 ** (-8): print("FAILURE: The steady-state population growth rate" + " from adjusted OMEGA is different (diff is " + str(g_n_SS_adj - g_n_SS) + ") than the steady-" + "state population growth rate from the original" + " OMEGA.") elif np.max(np.absolute(g_n_SS_adj - g_n_SS)) <= 10 ** (-8): print("SUCCESS: The steady-state population growth rate" + " from adjusted OMEGA is close to (diff is " + str(g_n_SS_adj - g_n_SS) + ") the steady-" + "state population growth rate from the original" + " OMEGA.") # Do another test of the adjusted immigration rates. Create the # new OMEGA matrix implied by the new immigration rates. Plug in # the adjusted steady-state population distribution. Hit is with # the new OMEGA transition matrix and it should return the new # steady-state population distribution omega_new = np.dot(OMEGA2, omega_SSfx) omega_errs = np.absolute(omega_new - omega_SSfx) print("The maximum absolute difference between the adjusted " + "steady-state population distribution and the " + "distribution generated by hitting the adjusted OMEGA " + "transition matrix is " + str(omega_errs.max())) # Plot the original immigration rates versus the adjusted # immigration rates immratesmaxdiff = \ np.absolute(imm_rates_orig - imm_rates_adj).max() print ("The maximum absolute distance between any two points " + "of the original immigration rates and adjusted " + "immigration rates is " + str(immratesmaxdiff)) fig, ax = plt.subplots() plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates") plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') plt.title( 'Original immigration rates vs. adjusted', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Imm. rates $i_{s}$") plt.xlim((0, E+S+1)) plt.legend(loc='upper center') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "OrigVsAdjImm") plt.savefig(output_path) plt.show() # Plot population distributions for data_year, curr_year, # curr_year+20, omega_SSfx, and omega_SS_orig fig, ax = plt.subplots() plt.plot(age_per_EpS, pop_2013_pct, label="2013 pop.") plt.plot(age_per_EpS, (omega_path_lev[:, 0] / omega_path_lev[:, 0].sum()), label=str(curr_year)+" pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(0.5 * S)] / omega_path_lev[:, int(0.5 * S)].sum()), label="T="+str(int(0.5 * S))+" pop.") plt.plot(age_per_EpS, (omega_path_lev[:, int(S)] / omega_path_lev[:, int(S)].sum()), label="T="+str(int(S))+" pop.") plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.") # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') plt.title( 'Population distribution at points in time path', fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E+S+1)) plt.legend(loc='lower left') # Create directory if OUTPUT directory does not already exist output_path = os.path.join(output_dir, "PopDistPath") plt.savefig(output_path) plt.show() # return omega_path_S, g_n_SS, omega_SSfx, survival rates, # mort_rates_S, and g_n_path return (omega_path_S.T, g_n_SS, omega_SSfx[-S:] / omega_SSfx[-S:].sum(), 1-mort_rates_S, mort_rates_S, g_n_path, imm_rates_mat.T, omega_S_preTP)
def get_imm_resid(totpers, min_yr, max_yr, graph=True): ''' -------------------------------------------------------------------- Calculate immigration rates by age as a residual given population levels in different periods, then output average calculated immigration rate. We have to replace the first mortality rate in this function in order to adjust the first implied immigration rate (Source: Population data come from Annual Estimates of the Resident Population by Single Year of Age and Sex: April 1, 2010 to July 1, 2013 (Both sexes) National Characteristics, Vintage 2013, US Census Bureau, http://www.census.gov/popest/data/national/asrh/2013/index.html) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utils.read_file() get_fert() get_mort() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data (beg per is 1) pop_2010 = (100,) vector, population for ages 0 to 99 in 2010 pop_2011 = (100,) vector, population for ages 0 to 99 in 2011 pop_2012 = (100,) vector, population for ages 0 to 99 in 2012 pop_2013 = (100,) vector, population for ages 0 to 99 in 2013 imm_mat = (3, 100) matrix, immigration rates computed as residuals for each age in three successive pairs of years pop11vec = (3,) vector, age-1 population in first three years pop21vec = (3,) vector, age-1 population in last three years fert_rates = (100,) vector, fertility rates by model age mort_rates = (100,) vector, mortality rates by model age infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook newbornvec = (3,) vector, total births in first three years pop11mat = (3, 99) matrix, population of age 1 through 99 for first three years pop12mat = (3, 99) matrix, population of age 2 through 100 for first three years pop22mat = (3, 99) matrix, population of age 2 through 100 for last three years mort_mat = (3, 99) matrix, the first 99 mortality rates copied into 3 rows imm_rates_all = (100,) vector, average of three years residual immigration rates by each age in data imm_func = function, generated by interp1d function, takes ages and returns the interpolated immigration rates age_per = (E+S,) vector, age in years at each period of life imm_rates = (E+S,) vector, immigration rates that correspond to each period of life RETURNS: imm_rates -------------------------------------------------------------------- ''' cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_table(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) & (pop_data['Age']<=max_yr-1)] age_year_all = pop_data_samp['Age'] + 1 pop_2010, pop_2011, pop_2012, pop_2013 = ( np.array(pop_data_samp['2010'], dtype='f'), np.array(pop_data_samp['2011'], dtype='f'), np.array(pop_data_samp['2012'], dtype='f'), np.array(pop_data_samp['2013'], dtype='f')) pop_2010_EpS = pop_rebin(pop_2010, totpers) pop_2011_EpS = pop_rebin(pop_2011, totpers) pop_2012_EpS = pop_rebin(pop_2012, totpers) pop_2013_EpS = pop_rebin(pop_2013, totpers) # Create three years of estimated immigration rates for youngest age # individuals imm_mat = np.zeros((3, totpers)) pop11vec = np.array([pop_2010_EpS[0], pop_2011_EpS[0], pop_2012_EpS[0]]) pop21vec = np.array([pop_2011_EpS[0], pop_2012_EpS[0], pop_2013_EpS[0]]) fert_rates = get_fert(totpers, min_yr, max_yr, False) mort_rates, infmort_rate = get_mort(totpers, min_yr, max_yr, False) newbornvec = np.dot(fert_rates, np.vstack((pop_2010_EpS, pop_2011_EpS, pop_2012_EpS)).T) imm_mat[:, 0] = ((pop21vec - (1 - infmort_rate) * newbornvec) / pop11vec) # Estimate 3 years of immigration rates for all other-aged # individuals pop11mat = np.vstack((pop_2010_EpS[:-1], pop_2011_EpS[:-1], pop_2012_EpS[:-1])) pop12mat = np.vstack((pop_2010_EpS[1:], pop_2011_EpS[1:], pop_2012_EpS[1:])) pop22mat = np.vstack((pop_2011_EpS[1:], pop_2012_EpS[1:], pop_2013_EpS[1:])) mort_mat = np.tile(mort_rates[:-1], (3, 1)) imm_mat[:, 1:] = (pop22mat - (1 - mort_mat) * pop11mat) / pop12mat # Final estimated immigration rates are the averages over 3 years imm_rates = imm_mat.mean(axis=0) age_per = np.linspace(1, totpers, totpers) if graph == True: ''' ---------------------------------------------------------------- output_fldr = string, path of the OUTPUT folder from cur_path output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' fig, ax = plt.subplots() plt.scatter(age_per, imm_rates, s=40, c='red', marker='d') plt.plot(age_per, imm_rates) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') # plt.title('Fitted immigration rates by age ($i_{s}$), residual', # fontsize=20) plt.xlabel(r'Age $s$ (model periods)') plt.ylabel(r'Imm. rate $i_{s}$') plt.xlim((0, totpers+1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) == False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "imm_rates_orig") plt.savefig(output_path) # plt.show() return imm_rates
def get_fert(totpers, min_yr, max_yr, graph=False): ''' -------------------------------------------------------------------- This function generates a vector of fertility rates by model period age that corresponds to the fertility rate data by age in years (Source: National Vital Statistics Reports, Volume 64, Number 1, January 15, 2015, Table 3, final 2013 data http://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_01.pdf) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, total number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utlis.read_file() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data (beg per=1) curr_pop = (100,) vector, population for ages 0 to 99 in 2013 curr_pop_pct = (100,) vector, population (in percent) for ages 0 to 99 in 2013 fert_data = (13,) vector, fertility rates for given age bins. We divide numbers by 2,000 because original data is in births per 1000 women. We assume an equal number of men. Added two zeros on the front and on the back to make spline interpolation work right age_midp = (13,) vector, midpoint age of age bins ranges from original data (9, 10, 10-14, 15-17, 18-19, 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The first two and last two are not data fert_func = function, generated by interp1d function, takes ages and returns the interpolated fertility rates binsize = scalar > 0, size of each model period bin in data years num_sub_bins = scalar, an arbitrarily and deliberately large number of sub-bins that each population bin will be broken up into len_subbins = scalar, length of a model period in data sub-bins age_sub = (num_sub_bins*100,) vector, midpoint ages of each data sub-bin curr_pop_sub = (num_sub_bins*100,) vector, population linearly interpolated from data in each sub-bin fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub- bin interpolated from fert_func() pred_ind = (num_sub_bins*100,) boolean vector, =True if period is one that must be interpolated age_pred = (num_sub_bins*100-some,) vector, midpoint age in years corresponding to each period to be interpolated fert_rates = (totpers,) vector, fertility rates for each model period of life i = integer >= 0, index of model period being computed beg_sub_bin = integer >= 0, index of beginning sub-bin for calculation of average fertility rate of given model period end_sub_bin = integer >= 0, index of ending sub-bin + 1 for calculation of average fertility rate of given model period FILES CREATED BY THIS FUNCTION: fert_rates.png RETURNS: fert_rates -------------------------------------------------------------------- ''' # Get current population data (2013) for weighting cur_path = os.path.split(os.path.abspath(__file__))[0] pop_file = utils.read_file(cur_path, "data/demographic/pop_data.csv") pop_data = pd.read_table(pop_file, sep=',', thousands=',') pop_data_samp = pop_data[(pop_data['Age']>=min_yr-1) & (pop_data['Age']<=max_yr-1)] age_year_all = pop_data_samp['Age'] + 1 curr_pop = np.array(pop_data_samp['2013'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum() # Get fertility rate by age-bin data fert_data = (np.array([0.0, 0.0, 0.3, 12.3, 47.1, 80.7, 105.5, 98.0, 49.3, 10.4, 0.8, 0.0, 0.0]) / 2000) age_midp = np.array([9, 10, 12, 16, 18.5, 22, 27, 32, 37, 42, 47, 55, 56]) # Generate interpolation functions for fertility rates fert_func = si.interp1d(age_midp, fert_data, kind='cubic') # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers age_sub = (np.linspace(np.float64(binsize) / num_sub_bins, np.float64(max_yr), int(num_sub_bins*max_yr)) - 0.5 * np.float64(binsize) / num_sub_bins) curr_pop_sub = np.repeat(np.float64(curr_pop_pct) / num_sub_bins, num_sub_bins) fert_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > age_midp[0]) * (age_sub < age_midp[-1]) age_pred = age_sub[pred_ind] fert_rates_sub[pred_ind] = np.float64(fert_func(age_pred)) fert_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) fert_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] * fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) if graph == True: ''' ---------------------------------------------------------------- age_fine_pred = (300,) vector, equally spaced support of ages between the minimum and maximum interpolating ages fert_fine_pred = (300,) vector, interpolated fertility rates based on age_fine_pred age_fine = (300+some,) vector of ages including leading and trailing zeros fert_fine = (300+some,) vector of fertility rates including leading and trailing zeros age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' # Generate finer age vector and fertility rate vector for # graphing cubic spline interpolating function age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) fert_fine_pred = fert_func(age_fine_pred) age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) fert_fine = np.hstack((0, fert_fine_pred, 0)) age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o', label='Data') plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d', label='Model period (integrated)') plt.plot(age_fine, fert_fine, label='Cubic spline') # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') # plt.title('Fitted fertility rate function by age ($f_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Fertility rate $f_{s}$') plt.xlim((min_yr-1, max_yr+1)) plt.ylim((-0.15*(fert_fine_pred.max()), 1.15*(fert_fine_pred.max()))) plt.legend(loc='upper right') plt.text(-5, -0.018, "Source: National Vital Statistics Reports, Volume 64, Number 1, January 15, 2015.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) == False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "fert_rates") plt.savefig(output_path) # plt.show() return fert_rates
def get_mort(totpers, min_yr, max_yr, graph=False): ''' -------------------------------------------------------------------- This function generates a vector of mortality rates by model period age. (Source: Male and Female death probabilities Actuarial Life table, 2011 Social Security Administration, http://www.ssa.gov/oact/STATS/table4c6.html) -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, total number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utils.read_file() mort_rates2011.csv OBJECTS CREATED WITHIN FUNCTION: infmort_rate = scalar > 0, infant mortality rate from 2015 U.S. CIA World Factbook cur_path = string, path where function calling file resides mort_file = string, path of mortality rate data source (.csv) mort_data = 120 x 7 DataFrame, 2011 mortality rate data for men and women age_year_all = (114,) vector, ages by year for which total mortality have positive population weight mort_rates_all = (114,) vector, mortality rates by all ages with positive population weight mort_rates_mxyr = (100,) vector, truncated mortality rates by age binsize = scalar > 0, size of each model period bin in data years num_sub_bins = scalar, an arbitrarily and deliberately large number of sub-bins that each population bin will be broken up into len_subbins = scalar, length of a model period in data sub-bins mort_rates_sub = (num_sub_bins*100,) vector, mortality rates by sub-bin implied by mort_rates_mxyr mort_rates = (totpers,) vector, mortality rates that correspond to each period of life i = integer >= 0, index of model period being computed beg_sub_bin = integer >= 0, index of beginning sub-bin for calculation of cumulative mortality rate of given model period end_sub_bin = integer >= 0, index of ending sub-bin + 1 for calculation of cumulative mortality rate of given model period FILES CREATED BY THIS FUNCTION: mort_rates.png RETURNS: mort_rates, infmort_rate -------------------------------------------------------------------- ''' # Get mortality rate by age data infmort_rate = 0.00587 # taken from 2015 U.S. infant mortality rate cur_path = os.path.split(os.path.abspath(__file__))[0] mort_file = utils.read_file(cur_path, 'data/demographic/mort_rates2011.csv') mort_data = pd.read_table(mort_file, sep=',', thousands=',') age_year_all = mort_data['Age'] + 1 mort_rates_all = (((mort_data['Male Mort. Rate'] * mort_data['Num. Male Lives']) + (mort_data['Female Mort. Rate'] * mort_data['Num. Female Lives'])) / (mort_data['Num. Male Lives'] + mort_data['Num. Female Lives'])) age_year_all = age_year_all[np.isfinite(mort_rates_all)] mort_rates_all = mort_rates_all[np.isfinite(mort_rates_all)] # Calculate implied mortality rates in sub-bins of mort_rates_all. mort_rates_mxyr = mort_rates_all[0:max_yr] binsize = (max_yr - min_yr + 1) / totpers num_sub_bins = int(100) len_subbins = ((np.float64((max_yr - min_yr + 1) * num_sub_bins)) / totpers) mort_rates_sub = np.zeros(num_sub_bins * max_yr, dtype=float) for i in range(max_yr): mort_rates_sub[i*num_sub_bins:(i+1)*num_sub_bins] =\ (1 - ((1 - mort_rates_mxyr[i]) ** (1.0 / num_sub_bins))) mort_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) mort_rates[i] = (1 - (1 - (mort_rates_sub[beg_sub_bin:end_sub_bin])).prod()) mort_rates[-1] = 1 # Mortality rate in last period is set to 1 if graph == True: ''' ---------------------------------------------------------------- age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' age_mid_new = (np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(np.hstack([0, age_year_all]), np.hstack([infmort_rate, mort_rates_all]), s=20, c='blue', marker='o', label='Data') plt.scatter(np.hstack([0, age_mid_new]), np.hstack([infmort_rate, mort_rates]), s=40, c='red', marker='d', label='Model period (cumulative)') plt.plot(np.hstack([0, age_year_all[min_yr-1:max_yr]]), np.hstack([infmort_rate, mort_rates_all[min_yr-1:max_yr]])) plt.axvline(x=max_yr, color='red', linestyle='-', linewidth=1) # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65',linestyle='-') # plt.title('Fitted mortality rate function by age ($rho_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Mortality rate $\rho_{s}$') plt.xlim((min_yr-2, age_year_all.max()+2)) plt.ylim((-0.05, 1.05)) plt.legend(loc='upper left') plt.text(-5, -0.2, "Source: Actuarial Life table, 2011 Social Security Administration.", fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = "OUTPUT/Demographics" output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) == False: os.makedirs(output_dir) output_path = os.path.join(output_dir, "mort_rates") plt.savefig(output_path) # plt.show() return mort_rates, infmort_rate
def get_fert(totpers, min_yr, max_yr, graph=False): ''' -------------------------------------------------------------------- This function generates a vector of fertility rates by model period age that corresponds to the fertility rate data by age in years -------------------------------------------------------------------- INPUTS: totpers = integer >= 3, total number of agent life periods (E+S) min_yr = integer >= 0, age in years at which agents are born, minimum age max_yr = integer >= 4, age in years at which agents die with certainty, maximum age graph = boolean, =True if want graphical output OTHER FUNCTIONS AND FILES CALLED BY THIS FUNCTION: utlis.read_file() pop_data.csv OBJECTS CREATED WITHIN FUNCTION: cur_path = string, path in which calling file resides pop_file = string, path of population data source csv file pop_data = 101 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 100 pop_data_samp = 100 x 5 DataFrame, Age, Pop2010, Pop2011, Pop2012, Pop2013, for ages 0 to 99 age_year_all = (100,) vector, ages by year from data (beg per=1) curr_pop = (100,) vector, population for ages 0 to 99 in 2013 curr_pop_pct = (100,) vector, population (in percent) for ages 0 to 99 in 2013 fert_data = (13,) vector, fertility rates for given age bins. We divide numbers by 2,000 because original data is in births per 1000 women. We assume an equal number of men. Added two zeros on the front and on the back to make spline interpolation work right age_midp = (13,) vector, midpoint age of age bins ranges from original data (9, 10, 10-14, 15-17, 18-19, 20-24, 25-29, 30-34, 35-39, 40-44, 45-49, 55, 56). The first two and last two are not data fert_func = function, generated by interp1d function, takes ages and returns the interpolated fertility rates binsize = scalar > 0, size of each model period bin in data years num_sub_bins = scalar, an arbitrarily and deliberately large number of sub-bins that each population bin will be broken up into len_subbins = scalar, length of a model period in data sub-bins age_sub = (num_sub_bins*100,) vector, midpoint ages of each data sub-bin curr_pop_sub = (num_sub_bins*100,) vector, population linearly interpolated from data in each sub-bin fert_rates_sub = (num_sub_bins*100,) vector, fertility rates by sub- bin interpolated from fert_func() pred_ind = (num_sub_bins*100,) boolean vector, =True if period is one that must be interpolated age_pred = (num_sub_bins*100-some,) vector, midpoint age in years corresponding to each period to be interpolated fert_rates = (totpers,) vector, fertility rates for each model period of life i = integer >= 0, index of model period being computed beg_sub_bin = integer >= 0, index of beginning sub-bin for calculation of average fertility rate of given model period end_sub_bin = integer >= 0, index of ending sub-bin + 1 for calculation of average fertility rate of given model period FILES CREATED BY THIS FUNCTION: fert_rates.png RETURNS: fert_rates -------------------------------------------------------------------- ''' # Get current population data for weighting pop_file = utils.read_file(cur_path, pop_dir) pop_data = pd.read_csv(pop_file, sep=r'\s+', usecols=['Year', 'Age', 'Total']) pop_data = select_pop_data(pop_data) pop_data_samp = pop_data[(pop_data['Age'] >= min_yr - 1) & (pop_data['Age'] <= max_yr - 1)] curr_pop = np.array(pop_data_samp[pop_data_samp['Year'] == 2014]['Total'], dtype='f') curr_pop_pct = curr_pop / curr_pop.sum( ) # pct population of that age group within same year # Get fertility rate by age-bin data fert_data = pd.read_csv(fert_dir, sep=',\s*',\ usecols=['Year1', 'Age', 'ASFR', 'AgeDef',\ 'Collection', 'RefCode']) fert_data = select_fert_data(fert_data) fert_list = [] for i in range(14, 51): age = fert_data[fert_data['Age'] == i] data = age[age['Year'].isin(range(1990, 2015))] fert_list.append(data['Values'].mean()) fert_data = fert_data[fert_data['Year'] == 1995] fert_data['Values'] = fert_list fert_data['Values'] = fert_data['Values'] / 2 # Generate interpolation functions for fertility rates fert_func = si.splrep(fert_data['Age'], fert_data['Values']) #### AGE BIN CREATION # Calculate average fertility rate in each age bin using trapezoid # method with a large number of points in each bin. binsize = (max_yr - min_yr + 1) / totpers # creating different generations (I believe?) num_sub_bins = float(10000) len_subbins = (np.float64(100 * num_sub_bins)) / totpers # 100 (lifetime year) / totpers gives us size of bins. To get length of subbin shouldnt we dividing by num_sub_bins ???? age_sub = ( np.linspace( np.float64(binsize) / num_sub_bins, # gives us the first subbin (len subbin) np.float64(max_yr), # gives us end point int(num_sub_bins * max_yr)) - 0.5 * # np.float64(binsize) / num_sub_bins) # gives us mid age of all subbins ### POPULATION CREATION ages = np.linspace(min_yr, max_yr, curr_pop_pct.shape[0]) pop_func = si.splrep(ages, curr_pop_pct) new_bins = np.linspace(min_yr, max_yr,\ num_sub_bins * max_yr) curr_pop_sub = si.splev(new_bins, pop_func) curr_pop_sub = curr_pop_sub / curr_pop_sub.sum() fert_rates_sub = np.zeros(curr_pop_sub.shape) pred_ind = (age_sub > fert_data['Age'].iloc[0]) * ( age_sub < fert_data['Age'].iloc[-1] ) # makes sure it is inside valid range age_pred = age_sub[ pred_ind] #gets age_sub in the valid range by applying pred_ind fert_rates_sub[pred_ind] = np.float64(si.splev(age_pred, fert_func)) fert_rates_sub[fert_rates_sub < 0] = 0 fert_rates = np.zeros(totpers) end_sub_bin = 0 for i in range(totpers): beg_sub_bin = int(end_sub_bin) end_sub_bin = int(np.rint((i + 1) * len_subbins)) fert_rates[i] = ((curr_pop_sub[beg_sub_bin:end_sub_bin] * fert_rates_sub[beg_sub_bin:end_sub_bin]).sum() / curr_pop_sub[beg_sub_bin:end_sub_bin].sum()) fert_rates = np.nan_to_num(fert_rates) if graph: ''' ---------------------------------------------------------------- age_fine_pred = (300,) vector, equally spaced support of ages between the minimum and maximum interpolating ages fert_fine_pred = (300,) vector, interpolated fertility rates based on age_fine_pred age_fine = (300+some,) vector of ages including leading and trailing zeros fert_fine = (300+some,) vector of fertility rates including leading and trailing zeros age_mid_new = (totpers,) vector, midpoint age of each model period age bin output_fldr = string, folder in current path to save files output_dir = string, total path of OUTPUT folder output_path = string, path of file name of figure to be saved ---------------------------------------------------------------- ''' # Generate finer age vector and fertility rate vector for # graphing cubic spline interpolating function age_fine_pred = np.linspace(age_midp[0], age_midp[-1], 300) fert_fine_pred = fert_func(age_fine_pred) age_fine = np.hstack((min_yr, age_fine_pred, max_yr)) fert_fine = np.hstack((0, fert_fine_pred, 0)) age_mid_new = ( np.linspace(np.float(max_yr) / totpers, max_yr, totpers) - (0.5 * np.float(max_yr) / totpers)) fig, ax = plt.subplots() plt.scatter(age_midp, fert_data, s=70, c='blue', marker='o', label='Data') plt.scatter(age_mid_new, fert_rates, s=40, c='red', marker='d', label='Model period (integrated)') plt.plot(age_fine, fert_fine, label='Cubic spline') # for the minor ticks, use no labels; default NullFormatter minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) plt.grid(b=True, which='major', color='0.65', linestyle='-') # plt.title('Fitted fertility rate function by age ($f_{s}$)', # fontsize=20) plt.xlabel(r'Age $s$') plt.ylabel(r'Fertility rate $f_{s}$') plt.xlim((min_yr - 1, max_yr + 1)) plt.ylim( (-0.15 * (fert_fine_pred.max()), 1.15 * (fert_fine_pred.max()))) plt.legend(loc='upper right') plt.text(-5, -0.018, 'Source: National Vital Statistics Reports, ' + 'Volume 64, Number 1, January 15, 2015.', fontsize=9) plt.tight_layout(rect=(0, 0.03, 1, 1)) # Create directory if OUTPUT directory does not already exist output_fldr = 'OUTPUT/Demographics' output_dir = os.path.join(cur_path, output_fldr) if os.access(output_dir, os.F_OK) is False: os.makedirs(output_dir) output_path = os.path.join(output_dir, 'fert_rates') plt.savefig(output_path) return fert_rates