def add_employment(df, year): hhs_by_inc = df[["hhincq1", "hhincq2", "hhincq3", "hhincq4"]] hh_shares = hhs_by_inc.divide(hhs_by_inc.sum(axis=1), axis="index") zfi = zone_forecast_inputs() empshare = 0.46381 * hh_shares.hhincq1 + 0.49361 * hh_shares.hhincq2 +\ 0.56938 * hh_shares.hhincq3 + 0.29818 * hh_shares.hhincq4 +\ zfi.zonal_emp_sh_resid10 # I really don't think more than 70% of people should be employed # in a given zone - this also makes sure that the employed residents # is less then the total population (after scaling) - if the # assertion below is triggered you can fix it by reducing this # .7 even a little bit more empshare = empshare.fillna(0).clip(.3, .7) empres = empshare * df.totpop rc = regional_controls() target = rc.empres.loc[year] empres = scale_by_target(empres, target) df["empres"] = round_series_match_target(empres, target, 0) # this should really make the assertion below pass, but this now # only occurs very infrequently df["empres"] = df[["empres", "totpop"]].min(axis=1) # make sure employed residents is less than total residents assert (df.empres <= df.totpop).all() return df
def add_age_categories(df, year): zfi = zone_forecast_inputs() rc = regional_controls() seed_matrix = zfi[["sh_age0004", "sh_age0519", "sh_age2044", "sh_age4564", "sh_age65p"]].\ mul(df.totpop, axis='index').as_matrix() row_marginals = df.totpop.values agecols = ["age0004", "age0519", "age2044", "age4564", "age65p"] col_marginals = rc[agecols].loc[year].values target = df.totpop.sum() col_marginals = scale_by_target(pd.Series(col_marginals), target).round().astype('int') seed_matrix[seed_matrix == 0] = .1 seed_matrix[row_marginals == 0, :] = 0 mat = simple_ipf(seed_matrix, col_marginals, row_marginals) agedf = pd.DataFrame(mat) agedf.columns = [col.upper() for col in agecols] agedf.index = zfi.index for ind, row in agedf.iterrows(): target = df.totpop.loc[ind] row = row.round() agedf.loc[ind] = round_series_match_target(row, target, 0) for col in agedf.columns: df[col] = agedf[col] return df
def add_population(df, year): rc = regional_controls() target = rc.totpop.loc[year] - df.gqpop.sum() zfi = zone_forecast_inputs() s = df.tothh * zfi.meanhhsize s = scale_by_target(s, target, .15) df["hhpop"] = round_series_match_target(s, target, 0) df["hhpop"] = df.hhpop.fillna(0) return df
def add_age_categories(df, year): zfi = zone_forecast_inputs() rc = regional_controls() # Added [df.index.isin(zfi.index)] to prevent errors on non-matching data seed_matrix = zfi[["sh_age0004", "sh_age0519", "sh_age2044", "sh_age4564", "sh_age65p"]].\ mul(df[df.index.isin(zfi.index)].totpop, axis='index').as_matrix() row_marginals = df.totpop.values agecols = ["age0004", "age0519", "age2044", "age4564", "age65p"] col_marginals = rc[agecols].loc[year].values target = df.totpop.sum() col_marginals = scale_by_target(pd.Series(col_marginals), target).round().astype('int') seed_matrix[seed_matrix == 0] = .1 seed_matrix[row_marginals == 0, :] = 0 # Added by Derek to make sure the seed_matrix is a matrix, rather than a # ndarray, which caused problems in simple_ipf seed_matrix = pd.DataFrame(seed_matrix) mat = simple_ipf(seed_matrix, col_marginals, row_marginals) agedf = pd.DataFrame(mat) agedf.columns = [col.upper() for col in agecols] agedf.index = zfi.index for ind, row in agedf.iterrows(): target = df.totpop.loc[ind] row = row.round() agedf.loc[ind] = round_series_match_target(row, target, 0) for col in agedf.columns: df[col] = agedf[col] return df