def top_x_pct_share(df, col, top_x_pct, w=None): """Calculates top x% share. :param df: DataFrame. :param col: Name of column in df representing value. :param top_x_pct: Decimal between 0 and 1 of the top %, e.g. 0.1, 0.001. :param w: Column representing weight in df. :returns: The share of w-weighted val held by the top x%. """ threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct) top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w) total_sum = mdf.weighted_sum(df, col, w) return top_x_pct_sum / total_sum
def tax(flat_tax, total_type="person"): """Calculate all metrics given a flat tax. Args: flat_tax: Percentage tax rate (0-100). total_type: Whether to use total population and current tax liability from SPM units or persons. Either "person" or "spmu". Defaults to "person". """ flat_tax /= 100 spmu["new_tax"] = spmu.spmu_agi * flat_tax new_revenue = mdf.weighted_sum(spmu, "new_tax", "spm_weight") change_revenue = new_revenue - totals.loc[total_type].fica_fedtax_ac ubi = change_revenue / totals.loc[total_type].person spmu["new_spm_resources"] = (spmu.spm_resources_before_tax + ubi * spmu.spmu_total_people - spmu.new_tax) # Merge back to each person. target_persons = person.merge(spmu[SPM_COLS + ["new_spm_resources"]], on=SPM_COLS) target_persons["new_spm_resources_pp"] = ( target_persons.new_spm_resources / target_persons.spmu_total_people) # Calculate poverty rate target_persons["new_poor"] = (target_persons.new_spm_resources < target_persons.spm_povthreshold) poverty_rate = mdf.weighted_mean(target_persons, "new_poor", "marsupwt") change_poverty_rate = chg(poverty_rate, initial_poverty_rate) # Calculate poverty gap poverty_gaps = np.maximum(spmu.spm_povthreshold - spmu.new_spm_resources, 0) poverty_gap = (poverty_gaps * spmu.spm_weight).sum() change_poverty_gap = chg(poverty_gap, initial_poverty_gap) # Calculate Gini gini = mdf.gini(target_persons, "new_spm_resources_pp", w="marsupwt") change_gini = chg(gini, initial_gini) # Percent winners target_persons["better_off"] = (target_persons.new_spm_resources > target_persons.spm_resources) percent_better_off = mdf.weighted_mean(target_persons, "better_off", "marsupwt") return pd.Series({ "poverty_rate": poverty_rate, "poverty_gap": poverty_gap, "gini": gini, "percent_better_off": percent_better_off, "change_poverty_rate": change_poverty_rate, "change_poverty_gap": change_poverty_gap, "change_gini": change_gini, "change_revenue": change_revenue, "ubi": ubi, })
def total_wealth_by_decile(data, measure): quant_df = pd.DataFrame() for race2 in data.race2.unique(): race_df = data[data.race2 == race2].copy(deep=True) decile_bounds = np.arange(0, 1.1, 0.1) deciles = mdf.weighted_quantile(race_df, measure, "wgt", decile_bounds) race_total_nw = mdf.weighted_sum(race_df, measure, "wgt") quantile_nws = [] for index, value in enumerate(deciles): if index + 1 < len(deciles): quantile_subset = race_df[race_df.networth.between( value, deciles[index + 1])] quantile_nws.append( mdf.weighted_sum(quantile_subset, measure, "wgt")) quantile_nw_pct = (quantile_nws / race_total_nw) * 100 race_quant_df = pd.DataFrame({race2: quantile_nw_pct}, index=np.arange(1, 11, 1)) quant_df = pd.concat([quant_df, race_quant_df], axis=1) return quant_df
def test_weighted_sum(): # Test unweighted. assert mdf.weighted_sum(df, "x") == 8 # Test weighted. assert mdf.weighted_sum(df, "x", "w") == 11 # Test weighted with multiple columns. assert mdf.weighted_sum(df, ["x", "y"], "w").tolist() == [11, -3] # Test grouped. mdf.weighted_sum(dfg, "x", "w", "g") mdf.weighted_sum(dfg, ["x", "y"], "w", "g")
def add_custom_tax( df, segment_income, w, base_income, incidence, name, total=None, ratio=None, verbose=True, ): """Add a custom tax based on incidence analysis driven by percentiles. :param df: DataFrame. :param segment_income: Income measure used to segment tax units into quantiles. :param w: Weight used to segment into quantiles (either s006 or XTOT_m). :param base_income: Income measure by which incidence is multiplied to estimate liability. :param incidence: pandas Series indexed on the floor of an income percentile, with values for the tax rate. :param name: Name of the column to add. :param total: Total amount the tax should generate. If not provided, liabilities are calculated only based on the incidence schedule. (Default value = None) :param ratio: Ratio to adjust the tax by, compared to the original tax. This acts as a multiplier for the incidence argument. (Default value = None) :param verbose: Whether to print the tax adjustment factor if needed. Defaults to True. :returns: Nothing. Adds the column name to df representing the tax liability. df is also sorted by segment_income. """ if ratio is not None: incidence = incidence * ratio assert total is None, "ratio and total cannot both be provided." df.sort_values(segment_income, inplace=True) income_percentile = 100 * df[w].cumsum() / df[w].sum() tu_incidence = incidence.iloc[pd.cut( income_percentile, # Add a right endpoint. Should be 100 but sometimes a decimal # gets added. bins=incidence.index.tolist() + [101], labels=False, )].values df[name] = np.maximum(0, tu_incidence * df[base_income]) if total is not None: initial_total = mdf.weighted_sum(df, name, "s006") if verbose: print("Multiplying tax by " + str(round(total / initial_total, 2)) + ".") df[name] *= total / initial_total
person.spm_weight /= 100 person["person"] = 1 person["fica_fedtax_ac"] = person.fica + person.fedtax_ac # Calculate the number of children and adults in each household. SPM_COLS = ["spm_id", "spm_povthreshold", "spm_resources", "spm_weight"] spmu = person.groupby(SPM_COLS)[["agi", "fica_fedtax_ac", "person"]].sum() spmu.columns = ["spmu_agi", "spmu_fica_fedtax_ac", "spmu_total_people"] spmu.reset_index(inplace=True) spmu[ "spm_resources_before_tax"] = spmu.spm_resources + spmu.spmu_fica_fedtax_ac person = person.merge(spmu, on=SPM_COLS) # Calculate totals at both person and SPM unit levels so we can compare and # calculate poverty gaps. person_totals = mdf.weighted_sum(person, ["fica_fedtax_ac", "person"], "marsupwt") spmu_totals = mdf.weighted_sum(spmu, ["spmu_fica_fedtax_ac", "spmu_total_people"], "spm_weight") spmu_totals.index = person_totals.index totals = pd.concat([person_totals, spmu_totals], axis=1).T totals.index = ["person", "spmu"] # Calculate status quo person["poor"] = person.spm_resources < person.spm_povthreshold initial_poverty_rate = mdf.weighted_mean(person, "poor", "marsupwt") spmu["initial_poverty_gap"] = np.maximum( spmu.spm_povthreshold - spmu.spm_resources, 0)
# create a column for all selected demographic variables # that will be used to calculate poverty rates DEMOG_COLS = [ "person", "adult", "child", "black", "white", "hispanic", "pwd", ] poor_pop = person[person.poor] # calculate weighted sum of people living in poverty mdf.weighted_sum(poor_pop, DEMOG_COLS, "asecwt") # calculate poverty RATE for each DEMOGRAPHIC in US pov_rate_us = mdf.weighted_sum(poor_pop, DEMOG_COLS, "asecwt") / mdf.weighted_sum( person, DEMOG_COLS, w="asecwt") # add name to series pov_rate_us.name = "US" # calculate poverty RATE for each group by state pov_rates = mdf.weighted_sum( poor_pop, DEMOG_COLS, "asecwt", groupby="state") / mdf.weighted_sum( person, DEMOG_COLS, w="asecwt", groupby="state") # append US statistics as additional 'state' pov_df = pov_rates.append(pov_rate_us) # melt df from wide to long format
def ubi(statefip, level, agi_tax, benefits, taxes, exclude): if level == "federal": # combine lists and initialize taxes_benefits = taxes + benefits spmu["new_resources"] = spmu.spmtotres revenue = 0 # Calculate the new revenue and spmu resources from tax and benefit change for tax_benefit in taxes_benefits: spmu.new_resources -= spmu[tax_benefit] revenue += mdf.weighted_sum(spmu, tax_benefit, "spmwt") if ("fedtaxac" in taxes_benefits) & ("ctc" in taxes_benefits): spmu.new_resources += spmu.ctc revenue -= mdf.weighted_sum(spmu, "ctc", "spmwt") if ("fedtaxac" in taxes_benefits) & ("eitcred" in taxes_benefits): spmu.new_resources += spmu.eitcred revenue -= mdf.weighted_sum(spmu, "eitcred", "spmwt") # Calculate the new taxes from flat tax on AGI tax_rate = agi_tax / 100 spmu["new_taxes"] = np.maximum(spmu.adjginc, 0) * tax_rate spmu.new_resources -= spmu.new_taxes revenue += mdf.weighted_sum(spmu, "new_taxes", "spmwt") # Calculate the total UBI a spmu recieves based on exclusions spmu["numper_ubi"] = spmu.numper if "children" in exclude: spmu["numper_ubi"] -= spmu.child if "non_citizens" in exclude: spmu["numper_ubi"] -= spmu.non_citizen if ("children" in exclude) and ("non_citizens" in exclude): spmu["numper_ubi"] += spmu.non_citizen_child if "adults" in exclude: spmu["numper_ubi"] -= spmu.adult if ("adults" in exclude) and ("non_citizens" in exclude): spmu["numper_ubi"] += spmu.non_citizen_adult # Assign UBI ubi_population = (spmu.numper_ubi * spmu.spmwt).sum() ubi = revenue / ubi_population spmu["total_ubi"] = ubi * spmu.numper_ubi # Calculate change in resources spmu.new_resources += spmu.total_ubi spmu["new_resources_per_person"] = spmu.new_resources / spmu.numper # Sort by state if statefip == "US": target_spmu = spmu.copy(deep=True) else: target_spmu = spmu[spmu.statefip == statefip].copy(deep=True) if level == "state": # Sort by state if statefip == "US": target_spmu = spmu.copy(deep=True) else: target_spmu = spmu[spmu.statefip == statefip].copy(deep=True) # Initialize target_spmu["new_resources"] = target_spmu.spmtotres revenue = 0 # Change income tax repeal to state level if "fedtaxac" in taxes: target_spmu.new_resources -= target_spmu.stataxac revenue += mdf.weighted_sum(target_spmu, "stataxac", "spmwt") # Calculate change in tax revenue tax_rate = agi_tax / 100 target_spmu["new_taxes"] = target_spmu.adjginc * tax_rate target_spmu.new_resources -= target_spmu.new_taxes revenue += mdf.weighted_sum(target_spmu, "new_taxes", "spmwt") # Calculate the total UBI a spmu recieves based on exclusions target_spmu["numper_ubi"] = target_spmu.numper if "children" in exclude: target_spmu["numper_ubi"] -= target_spmu.child if "non_citizens" in exclude: target_spmu["numper_ubi"] -= target_spmu.non_citizen if ("children" in exclude) and ("non_citizens" in exclude): target_spmu["numper_ubi"] += target_spmu.non_citizen_child if "adults" in exclude: target_spmu["numper_ubi"] -= target_spmu.adult if ("adults" in exclude) and ("non_citizens" in exclude): target_spmu["numper_ubi"] += target_spmu.non_citizen_adult # Assign UBI ubi_population = (target_spmu.numper_ubi * target_spmu.spmwt).sum() ubi = revenue / ubi_population target_spmu["total_ubi"] = ubi * target_spmu.numper_ubi # Calculate change in resources target_spmu.new_resources += target_spmu.total_ubi target_spmu["new_resources_per_person"] = ( target_spmu.new_resources / target_spmu.numper ) # Merge and create target_persons sub_spmu = target_spmu[ ["spmfamunit", "year", "new_resources", "new_resources_per_person"] ] target_persons = person.merge(sub_spmu, on=["spmfamunit", "year"]) # Calculate populations population = target_persons.asecwt.sum() child_population = (target_persons.child * target_persons.asecwt).sum() non_citizen_population = ( target_persons.non_citizen * target_persons.asecwt ).sum() non_citizen_child_population = ( target_persons.non_citizen_child * target_persons.asecwt ).sum() # Calculate total change in resources original_total_resources = ( target_spmu.spmtotres * target_spmu.spmwt ).sum() new_total_resources = (target_spmu.new_resources * target_spmu.spmwt).sum() change_total_resources = new_total_resources - original_total_resources change_pp = change_total_resources / population # Determine people originally in poverty target_persons["original_poor"] = ( target_persons.spmtotres < target_persons.spmthresh ) # Calculate original poverty rate original_total_poor = ( target_persons.original_poor * target_persons.asecwt ).sum() original_poverty_rate = (original_total_poor / population) * 100 # Calculate the original poverty gap target_spmu["poverty_gap"] = np.where( target_spmu.spmtotres < target_spmu.spmthresh, target_spmu.spmthresh - target_spmu.spmtotres, 0, ) original_poverty_gap = mdf.weighted_sum( target_spmu, "poverty_gap", "spmwt" ) # Calculate the orginal demographic poverty rates def pov_rate(column): return ( mdf.weighted_mean( target_persons[target_persons[column]], "original_poor", "asecwt", ) * 100 ) original_child_poverty_rate = pov_rate("child") original_adult_poverty_rate = pov_rate("adult") original_pwd_poverty_rate = pov_rate("pwd") original_white_poverty_rate = pov_rate("white_non_hispanic") original_black_poverty_rate = pov_rate("black") original_hispanic_poverty_rate = pov_rate("hispanic") # Caluclate original gini target_persons["spm_resources_per_person"] = ( target_persons.spmtotres / target_persons.numper ) original_gini = mdf.gini( target_persons, "spm_resources_per_person", "asecwt" ) # Calculate poverty gap target_spmu["new_poverty_gap"] = np.where( target_spmu.new_resources < target_spmu.spmthresh, target_spmu.spmthresh - target_spmu.new_resources, 0, ) poverty_gap = mdf.weighted_sum(target_spmu, "new_poverty_gap", "spmwt") poverty_gap_change = ( (poverty_gap - original_poverty_gap) / original_poverty_gap * 100 ).round(1) # Calculate the change in poverty rate target_persons["poor"] = ( target_persons.new_resources < target_persons.spmthresh ) total_poor = (target_persons.poor * target_persons.asecwt).sum() poverty_rate = (total_poor / population) * 100 poverty_rate_change = ( (poverty_rate - original_poverty_rate) / original_poverty_rate * 100 ).round(1) # Calculate change in Gini gini = mdf.gini(target_persons, "new_resources_per_person", "asecwt") gini_change = ((gini - original_gini) / original_gini * 100).round(1) # Calculate percent winners target_persons["winner"] = ( target_persons.new_resources > target_persons.spmtotres ) total_winners = (target_persons.winner * target_persons.asecwt).sum() percent_winners = (total_winners / population * 100).round(1) # Calculate the new poverty rate for each demographic def pv_rate(column): return ( mdf.weighted_mean( target_persons[target_persons[column]], "poor", "asecwt" ) * 100 ) child_poverty_rate = pv_rate("child") adult_poverty_rate = pv_rate("adult") pwd_poverty_rate = pv_rate("pwd") white_poverty_rate = pv_rate("white_non_hispanic") black_poverty_rate = pv_rate("black") hispanic_poverty_rate = pv_rate("hispanic") # Calculate the percent change in poverty rate for each demographic child_poverty_rate_change = ( (child_poverty_rate - original_child_poverty_rate) / original_child_poverty_rate * 100 ).round(1) adult_poverty_rate_change = ( (adult_poverty_rate - original_adult_poverty_rate) / original_adult_poverty_rate * 100 ).round(1) pwd_poverty_rate_change = ( (pwd_poverty_rate - original_pwd_poverty_rate) / original_pwd_poverty_rate * 100 ).round(1) white_poverty_rate_change = ( (white_poverty_rate - original_white_poverty_rate) / original_white_poverty_rate * 100 ).round(1) black_poverty_rate_change = ( (black_poverty_rate - original_black_poverty_rate) / original_black_poverty_rate * 100 ).round(1) hispanic_poverty_rate_change = ( (hispanic_poverty_rate - original_hispanic_poverty_rate) / original_hispanic_poverty_rate * 100 ).round(1) # Round all numbers for display in hover original_poverty_rate_string = str(round(original_poverty_rate, 1)) poverty_rate_string = str(round(poverty_rate, 1)) original_child_poverty_rate_string = str( round(original_child_poverty_rate, 1) ) child_poverty_rate_string = str(round(child_poverty_rate, 1)) original_adult_poverty_rate_string = str( round(original_adult_poverty_rate, 1) ) adult_poverty_rate_string = str(round(adult_poverty_rate, 1)) original_pwd_poverty_rate_string = str(round(original_pwd_poverty_rate, 1)) pwd_poverty_rate_string = str(round(pwd_poverty_rate, 1)) original_white_poverty_rate_string = str( round(original_white_poverty_rate, 1) ) white_poverty_rate_string = str(round(white_poverty_rate, 1)) original_black_poverty_rate_string = str( round(original_black_poverty_rate, 1) ) black_poverty_rate_string = str(round(black_poverty_rate, 1)) original_hispanic_poverty_rate_string = str( round(original_hispanic_poverty_rate, 1) ) hispanic_poverty_rate_string = str(round(hispanic_poverty_rate, 1)) original_poverty_gap_billions = original_poverty_gap / 1e9 original_poverty_gap_billions = int(original_poverty_gap_billions) original_poverty_gap_billions = "{:,}".format( original_poverty_gap_billions ) poverty_gap_billions = poverty_gap / 1e9 poverty_gap_billions = int(poverty_gap_billions) poverty_gap_billions = "{:,}".format(poverty_gap_billions) original_gini_string = str(round(original_gini, 3)) gini_string = str(round(gini, 3)) # Convert UBI and winners to string for title of chart ubi_int = int(ubi) ubi_int = "{:,}".format(ubi_int) ubi_string = str(ubi_int) winners_string = str(percent_winners) change_pp = int(change_pp) change_pp = "{:,}".format(change_pp) resources_string = str(change_pp) ubi_line = "UBI amount: $" + ubi_string winners_line = "Percent better off: " + winners_string + "%" resources_line = ( "Average change in resources per person: $" + resources_string ) # Create x-axis labels for each chart x = ["Poverty Rate", "Poverty Gap", "Inequality (Gini)"] x2 = [ "Child", "Adult", "People<br>with<br>disabilities", "White", "Black", "Hispanic", ] fig = go.Figure( [ go.Bar( x=x, y=[poverty_rate_change, poverty_gap_change, gini_change], text=[poverty_rate_change, poverty_gap_change, gini_change], hovertemplate=[ "Original poverty rate: " + original_poverty_rate_string + "%<br><extra></extra>" "New poverty rate: " + poverty_rate_string + "%", "Original poverty gap: $" + original_poverty_gap_billions + "B<br><extra></extra>" "New poverty gap: $" + poverty_gap_billions + "B", "Original gini: <extra></extra>" + original_gini_string + "<br>New gini: " + gini_string, ], marker_color=BLUE, ) ] ) # Edit text and display the UBI amount and percent winners in title fig.update_layout( uniformtext_minsize=10, uniformtext_mode="hide", plot_bgcolor="white" ) fig.update_traces(texttemplate="%{text}%", textposition="auto") fig.update_layout(title_text="Economic overview", title_x=0.5) fig.update_xaxes( tickangle=0, title_text="", tickfont={"size": 14}, title_standoff=25 ) fig.update_yaxes( # title_text = "Percent change", ticksuffix="%", tickprefix="", tickfont={"size": 14}, title_standoff=25, ) fig.update_layout( hoverlabel=dict(bgcolor="white", font_size=14, font_family="Roboto") ) fig.update_xaxes(title_font=dict(size=14, family="Roboto", color="black")) fig.update_yaxes(title_font=dict(size=14, family="Roboto", color="black")) fig2 = go.Figure( [ go.Bar( x=x2, y=[ child_poverty_rate_change, adult_poverty_rate_change, pwd_poverty_rate_change, white_poverty_rate_change, black_poverty_rate_change, hispanic_poverty_rate_change, ], text=[ child_poverty_rate_change, adult_poverty_rate_change, pwd_poverty_rate_change, white_poverty_rate_change, black_poverty_rate_change, hispanic_poverty_rate_change, ], hovertemplate=[ "Original child poverty rate: " + original_child_poverty_rate_string + "%<br><extra></extra>" "New child poverty rate: " + child_poverty_rate_string + "%", "Original adult poverty rate: " + original_adult_poverty_rate_string + "%<br><extra></extra>" "New adult poverty rate: " + adult_poverty_rate_string + "%", "Original pwd poverty rate: " + original_pwd_poverty_rate_string + "%<br><extra></extra>" "New pwd poverty rate: " + pwd_poverty_rate_string + "%", "Original White poverty rate: " + original_white_poverty_rate_string + "%<br><extra></extra>" "New White poverty rate: " + white_poverty_rate_string + "%", "Original Black poverty rate: " + original_black_poverty_rate_string + "%<br><extra></extra>" "New Black poverty rate: " + black_poverty_rate_string + "%", "Original Hispanic poverty rate: " + original_hispanic_poverty_rate_string + "%<br><extra></extra>" "New Hispanic poverty rate: " + hispanic_poverty_rate_string + "%", ], marker_color=BLUE, ) ] ) fig2.update_layout( uniformtext_minsize=10, uniformtext_mode="hide", plot_bgcolor="white" ) fig2.update_traces(texttemplate="%{text}%", textposition="auto") fig2.update_layout(title_text="Poverty rate breakdown", title_x=0.5) fig2.update_xaxes( tickangle=0, title_text="", tickfont={"size": 14}, title_standoff=25 ) fig2.update_yaxes( # title_text = "Percent change", ticksuffix="%", tickprefix="", tickfont={"size": 14}, title_standoff=25, ) fig2.update_layout( hoverlabel=dict(bgcolor="white", font_size=14, font_family="Roboto") ) fig2.update_xaxes(title_font=dict(size=14, family="Roboto", color="black")) fig2.update_yaxes(title_font=dict(size=14, family="Roboto", color="black")) return ubi_line, winners_line, resources_line, fig, fig2
def test_weighted_sum(): """ """ # TODO: Add None default to w. # assert mdf.weighted_sum(df, 'x') == 8 assert mdf.weighted_sum(df, "x", "w") == 11
def _top_x_pct_share(df, col, top_x_pct, w=None): threshold = mdf.weighted_quantile(df, col, w, 1 - top_x_pct) top_x_pct_sum = mdf.weighted_sum(df[df[col] >= threshold], col, w) total_sum = mdf.weighted_sum(df, col, w) return top_x_pct_sum / total_sum
def test_weighted_sum(): # TODO: Add None default to w. # assert mdf.weighted_sum(df, 'x') == 8 assert mdf.weighted_sum(df, 'x', 'w') == 11
"age_6_12", "person", ] spmu = person.groupby(SPMU_COLS)[SPMU_AGG_COLS].sum() spmu.columns = ["spmu_" + i for i in SPMU_AGG_COLS] spmu.reset_index(inplace=True) reg = sm.regression.linear_model.WLS( spmu.spmchxpns, spmu[["spmu_infant", "spmu_toddler", "spmu_preschool", "spmu_age_6_12"]], weights=spmu.spmwt, ) child_allowance_amounts = reg.fit().params # Calculate total cost of transfers, and total number of children program_cost = mdf.weighted_sum(spmu, "spmchxpns", "spmwt") total_child_6 = mdf.weighted_sum(spmu, "spmu_child_6", "spmwt") childallowance = program_cost / total_child_6 ### Ben - characterize distribution of spmchxpns - histogram (by number of kids) ### filter out households with children over 6. ### Recover average cost for children under 6. ### Weighting to recover average - multiply by total number of kids under age six. ### Other options - predict reg childcare expenses ~ child ages + num_kid # Less controls may be better here - just trying to decompose the amount # Consider different specifications # Create copies of the dataset in which to simulate the policies spmu_replace_cost = spmu.copy(deep=True) spmu_flat_transfer = spmu.copy(deep=True)
# code races s["race2"] = np.where( s.race.isin([1]), "White", # Not Including Hispanic. np.where(s.race == 2, "Black", np.where(s.race == 3, "Hispanic", "Other")), ) # famstruct 4 and 5 indicate married/LWP (living with partner) s["numper"] = 1 + s.famstruct.isin([4, 5]) + s.kids s["adults"] = 1 + s.famstruct.isin([4, 5]) # divide by number of adults s["networth_pa"] = s.networth / (1 + s.famstruct.isin([4, 5])) # Calculate tax base by finding weighted sum of individuals and their income. totals = mdf.weighted_sum(s, ["numper", "income"], w="wgt") totals.white_hhs = s[s.race2 == "White"].wgt.sum() totals.black_hhs = s[s.race2 == "Black"].wgt.sum() totals.total_hhs = s.wgt.sum() def ubi_sim(data, max_monthly_payment, step_size): # Initialize empty list to store our results. l = [] # loop through ubi for monthly_payment in np.arange(0, max_monthly_payment + 1, step_size): # multiply monthly payment by 12 to get annual payment size annual_payment = monthly_payment * 12 # calculate simulation-level stats
def tot_num_cost(group): return mdf.weighted_sum(person_quality,["cost", "person"],"asecwt",groupby=group).reset_index()
household number of children; and 2) a flat transfer equal to the average USA cost of childcare for the average household. """ # Create copies of the dataset in which to simulate the policies spmu_quality_state = spmu_quality.copy(deep=True) spmu_quality_us = spmu_quality.copy(deep=True) # Generate scenario flags to separate datasets spmu_quality_state["sim_flag"] = "state" spmu_quality_us["sim_flag"] = "US" # Calculate cost of the policies ### Check the following with Max tot_cost = mdf.weighted_sum(spmu_quality,"spmu_cost_per_child",["spmwt","spmu_toddler","spmu_infant","spmu_preschool"],groupby="high_quality") # Need total cost by infants v toddlers etc. spmu_quality_state.spmftotval += spmu_quality_state.spmu_cost_per_child spmu_quality_state.spmftotval += tot_cost # Calculate number of children per household # Define function to calculate state-based program cost def state_cost(age, qual): return ( mdf.weighted_sum(spmu_quality[(spmu_quality.age_cat == "age") & (spmu_quality.high_quality == qual)], "cost", "spmwt", groupby="state", )
"https://github.com/MaxGhenis/datarepo/raw/master/pppub20.csv.gz", usecols=["PRDTRACE", "MARSUPWT", "AGI"] + [i.upper() for i in SPM_COLS], ) person = raw.copy(deep=True) person.columns = person.columns.str.lower() person["weight"] = person.marsupwt / 100 person.spm_weight /= 100 person = person.rename(columns={"prdtrace": "race"}) # Add indicators for white only and black only (not considering other races). person["white"] = person.race == 1 person["black"] = person.race == 2 # Limit to positive AGI. person["agi_pos"] = np.maximum(person.agi, 0) # Need total population to calculate UBI and total AGI for required tax rate. total_population = person.weight.sum() total_agi = mdf.weighted_sum(person, "agi_pos", "weight") # Sum up AGI for each SPM unit and merge that back to person level. spm = person.groupby(SPM_COLS)[["agi_pos", "white", "black"]].sum() spm.columns = ["spm_" + i for i in spm.columns] # Merge these back to person to calculate population in White and Black spmus. person = person.merge(spm, on="spm_id") pop_in_race_spmu = pd.Series({ "Black": person[person.spm_black > 0].weight.sum(), "White": person[person.spm_white > 0].weight.sum(), }) spm.reset_index(inplace=True) def pov_gap(df, resources, threshold, weight):
"spmthresh", "year", ] spmu = pd.DataFrame( person.groupby(SPMU_COLS)[[ "child_6", "infant", "toddler", "preschool", "person" ]].sum()).reset_index() SPMU_AGG_COLS = ["child_6", "infant", "toddler", "preschool", "person"] spmu = person.groupby(SPMU_COLS)[SPMU_AGG_COLS].sum() spmu.columns = ["spmu_" + i for i in SPMU_AGG_COLS] spmu.reset_index(inplace=True) # Calculate total cost of transfers, and total number of children program_cost = mdf.weighted_sum(spmu, "spmchxpns", "spmwt") total_child_6 = mdf.weighted_sum(spmu, "spmu_child_6", "spmwt") childallowance = program_cost / total_child_6 # Create copies of the dataset in which to simulate the policies spmu_replace_cost = spmu.copy(deep=True) spmu_flat_transfer = spmu.copy(deep=True) # Generate scenario flags to separate datasets spmu["sim_flag"] = "baseline" spmu_replace_cost["sim_flag"] = "cc_replacement" spmu_flat_transfer["sim_flag"] = "child_allowance" # Caluclate new income by simulation spmu_replace_cost.spmftotval += spmu_replace_cost.spmchxpns
def ubi(state_dropdown, level, agi_tax, benefits, taxes, include): """this does everything from microsimulation to figure creation. Dash does something automatically where it takes the input arguments in the order given in the @app.callback decorator Args: state_dropdown: takes input from callback input, component_id="state-dropdown" level: component_id="level" agi_tax: component_id="agi-slider" benefits: component_id="benefits-checklist" taxes: component_id="taxes-checklist" include: component_id="include-checklist" Returns: ubi_line: outputs to "ubi-output" in @app.callback revenue_line: outputs to "revenue-output" in @app.callback ubi_population_line: outputs to "revenue-output" in @app.callback winners_line: outputs to "winners-output" in @app.callback resources_line: outputs to "resources-output" in @app.callback fig: outputs to "econ-graph" in @app.callback fig2: outputs to "breakdown-graph" in @app.callback """ # -------------------- calculations based on reform level -------------------- # # if the "Reform level" selected by the user is federal if level == "federal": # combine taxes and benefits checklists into one list to be used to # subset spmu dataframe taxes_benefits = taxes + benefits # initialize new resources column with old resources as baseline spmu["new_resources"] = spmu.spmtotres # initialize revenue at zero revenue = 0 # Calculate the new revenue and spmu resources from tax and benefit change for tax_benefit in taxes_benefits: # subtract taxes and benefits that have been changed from spm unit's resources spmu.new_resources -= spmu[tax_benefit] # add that same value to revenue revenue += mdf.weighted_sum(spmu, tax_benefit, "spmwt") # if "Income taxes" = ? and "child_tax_credit" = ? # in taxes/benefits checklist if ("fedtaxac" in taxes_benefits) & ("ctc" in taxes_benefits): spmu.new_resources += spmu.ctc revenue -= mdf.weighted_sum(spmu, "ctc", "spmwt") if ("fedtaxac" in taxes_benefits) & ("eitcred" in taxes_benefits): spmu.new_resources += spmu.eitcred revenue -= mdf.weighted_sum(spmu, "eitcred", "spmwt") # Calculate the new taxes from flat tax on AGI tax_rate = agi_tax / 100 spmu["new_taxes"] = np.maximum(spmu.adjginc, 0) * tax_rate # subtract new taxes from new resources spmu.new_resources -= spmu.new_taxes # add new revenue when new taxes are applied on spmus, multiplied by weights revenue += mdf.weighted_sum(spmu, "new_taxes", "spmwt") # Calculate the total UBI a spmu recieves based on exclusions spmu["numper_ubi"] = spmu.numper # TODO make into linear equation on one line using array of some kind if "children" not in include: # subtract the number of children from the number of # people in spm unit receiving ubi benefit spmu["numper_ubi"] -= spmu.child if "non_citizens" not in include: spmu["numper_ubi"] -= spmu.non_citizen if ("children" not in include) and ("non_citizens" not in include): spmu["numper_ubi"] += spmu.non_citizen_child if "adults" not in include: spmu["numper_ubi"] -= spmu.adult if ("adults" not in include) and ("non_citizens" not in include): spmu["numper_ubi"] += spmu.non_citizen_adult # Assign UBI ubi_population = (spmu.numper_ubi * spmu.spmwt).sum() ubi_annual = revenue / ubi_population spmu["total_ubi"] = ubi_annual * spmu.numper_ubi # Calculate change in resources spmu.new_resources += spmu.total_ubi spmu["new_resources_per_person"] = spmu.new_resources / spmu.numper # Sort by state # NOTE: the "target" here refers to the population being # measured for gini/poverty rate/etc. # I.e. the total population of the state/country and # INCLUDING those excluding form recieving ubi payments # state here refers to the selection from the drop down, not the reform level if state_dropdown == "US": target_spmu = spmu else: target_spmu = spmu[spmu.state == state_dropdown] # if the "Reform level" dropdown selected by the user is State if level == "state": # Sort by state if state_dropdown == "US": target_spmu = spmu else: target_spmu = spmu[spmu.state == state_dropdown] # Initialize target_spmu["new_resources"] = target_spmu.spmtotres revenue = 0 # Change income tax repeal to state level if "fedtaxac" in taxes: target_spmu.new_resources -= target_spmu.stataxac revenue += mdf.weighted_sum(target_spmu, "stataxac", "spmwt") # Calculate change in tax revenue tax_rate = agi_tax / 100 target_spmu["new_taxes"] = target_spmu.adjginc * tax_rate target_spmu.new_resources -= target_spmu.new_taxes revenue += mdf.weighted_sum(target_spmu, "new_taxes", "spmwt") # Calculate the total UBI a spmu recieves based on exclusions target_spmu["numper_ubi"] = target_spmu.numper if "children" not in include: target_spmu["numper_ubi"] -= target_spmu.child if "non_citizens" not in include: target_spmu["numper_ubi"] -= target_spmu.non_citizen if ("children" not in include) and ("non_citizens" not in include): target_spmu["numper_ubi"] += target_spmu.non_citizen_child if "adults" not in include: target_spmu["numper_ubi"] -= target_spmu.adult if ("adults" not in include) and ("non_citizens" not in include): target_spmu["numper_ubi"] += target_spmu.non_citizen_adult # Assign UBI ubi_population = (target_spmu.numper_ubi * target_spmu.spmwt).sum() ubi_annual = revenue / ubi_population target_spmu["total_ubi"] = ubi_annual * target_spmu.numper_ubi # Calculate change in resources target_spmu.new_resources += target_spmu.total_ubi target_spmu["new_resources_per_person"] = (target_spmu.new_resources / target_spmu.numper) # NOTE: code after this applies to both reform levels # Merge and create target_persons - # NOTE: the "target" here refers to the population being # measured for gini/poverty rate/etc. # I.e. the total population of the state/country and # INCLUDING those excluding form recieving ubi payments sub_spmu = target_spmu[[ "spmfamunit", "year", "new_resources", "new_resources_per_person" ]] target_persons = person.merge(sub_spmu, on=["spmfamunit", "year"]) # filter demog_stats for selected state from dropdown baseline_demog = demog_stats[demog_stats.state == state_dropdown] # TODO: return dictionary of results instead of return each variable def return_demog(demog, metric): """ retrieve pre-processed data by demographic args: demog - string one of ['person', 'adult', 'child', 'black', 'white', 'hispanic', 'pwd', 'non_citizen', 'non_citizen_adult', 'non_citizen_child'] metric - string, one of ['pov_rate', 'pop'] returns: value - float """ # NOTE: baseline_demog is a dataframe with global scope value = baseline_demog.loc[ (baseline_demog["demog"] == demog) & (baseline_demog["metric"] == metric), "value", # NOTE: returns the first value as a float, be careful if you redefine baseline_demog ].values[0] return value population = return_demog(demog="person", metric="pop") child_population = return_demog(demog="child", metric="pop") non_citizen_population = return_demog(demog="non_citizen", metric="pop") non_citizen_child_population = return_demog(demog="non_citizen_child", metric="pop") # filter all state stats gini, poverty_gap, etc. for dropdown state baseline_all_state_stats = all_state_stats[all_state_stats.index == state_dropdown] def return_all_state(metric): """filter baseline_all_state_stats and return value of select metric Keyword arguments: metric - string, one of 'poverty_gap', 'gini', 'total_resources' returns: value- float """ return baseline_all_state_stats[metric].values[0] # Calculate total change in resources original_total_resources = return_all_state("total_resources") # DO NOT PREPROCESS, new_resources new_total_resources = (target_spmu.new_resources * target_spmu.spmwt).sum() change_total_resources = new_total_resources - original_total_resources change_pp = change_total_resources / population original_poverty_rate = return_demog("person", "pov_rate") original_poverty_gap = return_all_state("poverty_gap") # define orignal gini coefficient original_gini = return_all_state("gini") # function to calculate rel difference between one number and another def rel_change(new, old, round=3): return ((new - old) / old).round(round) # Calculate poverty gap target_spmu["new_poverty_gap"] = np.where( target_spmu.new_resources < target_spmu.spmthresh, target_spmu.spmthresh - target_spmu.new_resources, 0, ) poverty_gap = mdf.weighted_sum(target_spmu, "new_poverty_gap", "spmwt") poverty_gap_change = rel_change(poverty_gap, original_poverty_gap) # Calculate the change in poverty rate target_persons["poor"] = (target_persons.new_resources < target_persons.spmthresh) total_poor = (target_persons.poor * target_persons.asecwt).sum() poverty_rate = total_poor / population poverty_rate_change = rel_change(poverty_rate, original_poverty_rate) # Calculate change in Gini gini = mdf.gini(target_persons, "new_resources_per_person", "asecwt") gini_change = rel_change(gini, original_gini, 3) # Calculate percent winners target_persons["winner"] = (target_persons.new_resources > target_persons.spmtotres) total_winners = (target_persons.winner * target_persons.asecwt).sum() percent_winners = (total_winners / population * 100).round(1) # -------------- calculate all of the poverty breakdown numbers -------------- # # Calculate the new poverty rate for each demographic def pv_rate(column): return mdf.weighted_mean(target_persons[target_persons[column]], "poor", "asecwt") # Round all numbers for display in hover def hover_string(metric, round_by=1): """formats 0.121 to 12.1%""" string = str(round(metric * 100, round_by)) + "%" return string DEMOGS = ["child", "adult", "pwd", "white", "black", "hispanic"] # create dictionary for demographic breakdown of poverty rates pov_breakdowns = { # return precomputed baseline poverty rates "original_rates": {demog: return_demog(demog, "pov_rate") for demog in DEMOGS}, "new_rates": {demog: pv_rate(demog) for demog in DEMOGS}, } # add poverty rate changes to dictionary pov_breakdowns["changes"] = { # Calculate the percent change in poverty rate for each demographic demog: rel_change( pov_breakdowns["new_rates"][demog], pov_breakdowns["original_rates"][demog], ) for demog in DEMOGS } # create string for hover template pov_breakdowns["strings"] = { demog: "Original " + demog + " poverty rate: " + hover_string(pov_breakdowns["original_rates"][demog]) + "<br><extra></extra>" + "New " + demog + " poverty rate: " + hover_string(pov_breakdowns["new_rates"][demog]) for demog in DEMOGS } # format original and new overall poverty rate original_poverty_rate_string = hover_string(original_poverty_rate) poverty_rate_string = hover_string(poverty_rate) original_poverty_gap_billions = "{:,}".format( int(original_poverty_gap / 1e9)) poverty_gap_billions = "{:,}".format(int(poverty_gap / 1e9)) original_gini_string = str(round(original_gini, 3)) gini_string = str(round(gini, 3)) # --------------SECTION populates "Results of your reform:" ------------ # # Convert UBI and winners to string for title of chart ubi_string = str("{:,}".format(int(round(ubi_annual / 12)))) # populates Monthly UBI ubi_line = "Monthly UBI: $" + ubi_string # populates 'Funds for UBI' revenue_line = "Funds for UBI: $" + numerize.numerize(revenue, 1) # populates population and revenue for UBI if state selected from dropdown if state_dropdown != "US": # filter for selected state state_spmu = target_spmu[target_spmu.state == state_dropdown] # calculate population of state recieving UBI state_ubi_population = (state_spmu.numper_ubi * state_spmu.spmwt).sum() ubi_population_line = "UBI population: " + numerize.numerize( state_ubi_population, 1) state_revenue = ubi_annual * state_ubi_population revenue_line = ("Funds for UBI (" + state_dropdown + "): $" + numerize.numerize(state_revenue, 1)) else: ubi_population_line = "UBI population: " + numerize.numerize( ubi_population, 1) winners_line = "Percent better off: " + str(percent_winners) + "%" resources_line = ("Average change in resources per person: $" + "{:,}".format(int(change_pp))) # ---------- populate economic breakdown bar chart ------------- # # Create x-axis labels for each chart econ_fig_x_lab = ["Poverty rate", "Poverty gap", "Gini index"] econ_fig_cols = [poverty_rate_change, poverty_gap_change, gini_change] econ_fig = go.Figure([ go.Bar( x=econ_fig_x_lab, y=econ_fig_cols, text=econ_fig_cols, hovertemplate=[ # poverty rates "Original poverty rate: " + original_poverty_rate_string + "<br><extra></extra>" "New poverty rate: " + poverty_rate_string, # poverty gap "Original poverty gap: $" + original_poverty_gap_billions + "B<br><extra></extra>" "New poverty gap: $" + poverty_gap_billions + "B", # gini "Original Gini index: <extra></extra>" + original_gini_string + "<br>New Gini index: " + gini_string, ], marker_color=BLUE, ) ]) # Edit text and display the UBI amount and percent winners in title econ_fig.update_layout( uniformtext_minsize=10, uniformtext_mode="hide", plot_bgcolor="white", title_text="Economic overview", title_x=0.5, hoverlabel_align="right", font_family="Roboto", title_font_size=20, paper_bgcolor="white", hoverlabel=dict(bgcolor="white", font_size=14, font_family="Roboto"), yaxis_tickformat="%", ) econ_fig.update_traces(texttemplate="%{text:.1%f}", textposition="auto") econ_fig.update_xaxes( tickangle=45, title_text="", tickfont={"size": 14}, title_standoff=25, title_font=dict(size=14, family="Roboto", color="black"), ) econ_fig.update_yaxes( tickprefix="", tickfont={"size": 14}, title_standoff=25, title_font=dict(size=14, family="Roboto", color="black"), ) # ------------------ populate poverty breakdown charts ---------------- # breakdown_fig_x_lab = [ "Child", "Adult", "People<br>with<br>disabilities", "White", "Black", "Hispanic", ] breakdown_fig_cols = [pov_breakdowns["changes"][demog] for demog in DEMOGS] hovertemplate = [pov_breakdowns["strings"][demog] for demog in DEMOGS] breakdown_fig = go.Figure([ go.Bar( x=breakdown_fig_x_lab, y=breakdown_fig_cols, text=breakdown_fig_cols, hovertemplate=hovertemplate, marker_color=BLUE, ) ]) breakdown_fig.update_layout( uniformtext_minsize=10, uniformtext_mode="hide", plot_bgcolor="white", title_text="Poverty rate breakdown", title_x=0.5, hoverlabel_align="right", font_family="Roboto", title_font_size=20, paper_bgcolor="white", hoverlabel=dict(bgcolor="white", font_size=14, font_family="Roboto"), yaxis_tickformat="%", ) breakdown_fig.update_traces(texttemplate="%{text:.1%f}", textposition="auto") breakdown_fig.update_xaxes( tickangle=45, title_text="", tickfont=dict(size=14, family="Roboto"), title_standoff=25, title_font=dict(size=14, family="Roboto", color="black"), ) breakdown_fig.update_yaxes( tickprefix="", tickfont=dict(size=14, family="Roboto"), title_standoff=25, title_font=dict(size=14, family="Roboto", color="black"), ) # set both y-axes to the same range full_econ_fig = econ_fig.full_figure_for_development(warn=False) full_breakdown_fig = breakdown_fig.full_figure_for_development(warn=False) # find the minimum of both y-axes global_ymin = min( min(full_econ_fig.layout.yaxis.range), min(full_breakdown_fig.layout.yaxis.range), ) global_ymax = max( max(full_econ_fig.layout.yaxis.range), max(full_breakdown_fig.layout.yaxis.range), ) # update the yaxes of the figure to account for both ends of the ranges econ_fig.update_yaxes( dict(range=[global_ymin, global_ymax], autorange=False)) breakdown_fig.update_yaxes( dict(range=[global_ymin, global_ymax], autorange=False)) return ( ubi_line, revenue_line, ubi_population_line, winners_line, resources_line, econ_fig, breakdown_fig, )