def test_sum(self): males_under_5, males_under_5_moe = 10154024, 3778 females_under_5, females_under_5_moe = 9712936, 3911 self.assertEqual( census_data_aggregator.approximate_sum( (males_under_5, males_under_5_moe), (females_under_5, females_under_5_moe)), (19866960, 5437.757350231803)) # With multiple zeros self.assertEqual( census_data_aggregator.approximate_sum([0.0, 22], [0, 22], [0, 29], [41, 37]), (41, 47.01063709417264))
def _aggregate(group_df): """ The function that aggregates each group """ out = {} for col in data_columns: # The name of the error column (if it exists) error_col = f"{col}_moe" # remove any NaN rows subset = group_df.dropna(subset=[col], how="any") # aggregat if we had any rows left if len(subset): # column values, margin of error (if it exists) args = np.column_stack([ subset[col], subset.get(error_col, np.zeros(len(subset))) ]) # do the aggregation aggval, moe = cda.approximate_sum(*args) else: aggval = moe = np.nan # store out[col] = aggval if error_col in subset.columns: out[f"{col}_moe"] = moe out["geometry"] = group_df.geometry.unary_union return pd.Series(out)
def calculate_total_bachelors_or_higher_moe(row): if row['female_bachelors_or_higher_moe'] in list(MOE_MAP.values()): value = sum([ row['female_bachelors_or_higher'], row['male_bachelors_or_higher'] ]) moe = None value, moe = census_data_aggregator.approximate_sum( (row['female_bachelors_degree'], row['female_bachelors_degree_moe']), (row['female_masters_degree'], row['female_masters_degree_moe']), (row['female_professional_school_degree'], row['female_professional_school_degree_moe']), (row['female_doctorate_degree'], row['female_doctorate_degree_moe']), (row['male_bachelors_degree'], row['male_bachelors_degree_moe']), (row['male_masters_degree'], row['male_masters_degree_moe']), (row['male_professional_school_degree'], row['male_professional_school_degree_moe']), (row['male_doctorate_degree'], row['male_doctorate_degree_moe'])) row['total_bachelors_or_higher'] = value row['total_bachelors_or_higher_moe'] = moe return row
def test_sum(self): males_under_5, males_under_5_moe = 10154024, 3778 females_under_5, females_under_5_moe = 9712936, 3911 self.assertEqual( census_data_aggregator.approximate_sum( (males_under_5, males_under_5_moe), (females_under_5, females_under_5_moe)), (19866960, 5437.757350231803)) # With multiple zeros self.assertEqual( census_data_aggregator.approximate_sum([0.0, 22], [0, 22], [0, 29], [41, 37]), (41, 47.01063709417264)) # From the ACS handbook examples single_women = ((135173, 3860), (43104, 2642), (24842, 1957)) self.assertEqual(census_data_aggregator.approximate_sum(*single_women), (203119, 5070.4647715963865))
def approximate_sum(row, cols): """ Apply this function to a DataFrame, summing over the columns specified. Note ---- This assumes a margin of error column exists """ args = [(row[col], row[f"{col}_moe"]) for col in cols] return pd.Series(cda.approximate_sum(*args))
def test_sum_ch8(self): # Never-married female characteristics from Table 8.1 nmf_fairfax = (135173, 3860) nmf_arlington = (43104, 2642) nmf_alexandria = (24842, 1957) # Calculate aggregate pop and MOE agg_pop, agg_moe = census_data_aggregator.approximate_sum( nmf_fairfax, nmf_arlington, nmf_alexandria) self.assertEqual(agg_pop, 203119) self.assertAlmostEqual(agg_moe, 5070, places=0)
def calculate_asians_moe(row): pprint(row) if row['asian_alone_moe'] in list(MOE_MAP.values()): value = sum([row['asian_alone'], row['native_hawaiian_and_pacific_islander']]) moe = None value, moe = census_data_aggregator.approximate_sum( (row['native_hawaiian_and_pacific_islander'], row['native_hawaiian_and_pacific_islander_moe']), (row['asian_alone'], row['asian_alone_moe']), ) row['asians_all'] = value row['asians_all_moe'] = moe return row
def calculate_moe(row): pprint(row) if row['internet_without_subscription_moe'] in list(MOE_MAP.values()): value = sum([row['internet_without_subscription'], row['no_internet']]) moe = None value, moe = census_data_aggregator.approximate_sum( (row['internet_without_subscription'], row['internet_without_subscription_moe']), (row['no_internet'], row['no_internet_moe']), ) row['total_no_internet_and_no_subscription'] = value row['total_no_internet_and_no_subscription_moe'] = moe return row
def calculate_other_moe(row): pprint(row) if row['american_indian_and_alaska_native_moe'] in list(MOE_MAP.values()): value = sum([row['american_indian_and_alaska_native'], row['other_alone'], row['two_or_more_races']]) moe = None value, moe = census_data_aggregator.approximate_sum( (row['american_indian_and_alaska_native'], row['american_indian_and_alaska_native_moe']), (row['other_alone'], row['other_alone_moe']), (row['two_or_more_races'], row['two_or_more_races_moe']), ) row['other_all'] = value row['other_all_moe'] = moe return row
def test_proportion_ch8(self): # Total females aged 15 and older from Table 8.4 tf15_fairfax = (466037, 391) tf15_arlington = (97360, 572) tf15_alexandria = (67101, 459) # Aggregate the values and MOEs denominator = census_data_aggregator.approximate_sum( tf15_fairfax, tf15_arlington, tf15_alexandria) numerator = (203119, 5070) # Calculate the proportion and its MOE proportion, moe = census_data_aggregator.approximate_proportion( numerator, denominator) self.assertAlmostEqual(proportion, 0.322, places=3) self.assertAlmostEqual(moe, 0.008, places=3)
def test_ratio_ch8(self): # Never-married Males from table 8.5 nmm_fairfax = (156720, 4222) nmm_arlington = (44613, 2819) nmm_alexandria = (25507, 2259) # Aggregate the values and MOEs numerator = census_data_aggregator.approximate_sum( nmm_fairfax, nmm_arlington, nmm_alexandria) denominator = (203119, 5070) # Calculate the proportion and its MOE ratio, moe = census_data_aggregator.approximate_ratio( numerator, denominator) self.assertAlmostEqual(ratio, 1.117, places=3) self.assertAlmostEqual(moe, 0.039, places=3)
def calculate_moe(row): # our custom groups if row['us_citizen_by_naturalization_moe'] in list( MOE_MAP.values()): value = sum([ row['us_citizen_by_naturalization'], row['us_citizen_born_abroad_american_parents'], row['us_citizen_born_puertorico_or_us_island_alone'], row['us_citizen_born_us'] ]) moe = None value, moe = census_data_aggregator.approximate_sum( (row['us_citizen_by_naturalization'], row['us_citizen_by_naturalization_moe']), (row['us_citizen_born_abroad_american_parents'], row['us_citizen_born_abroad_american_parents_moe']), (row['us_citizen_born_puertorico_or_us_island_alone'], row['us_citizen_born_puertorico_or_us_island_moe']), (row['us_citizen_born_us'], row['us_citizen_born_us_moe']), ) row['us_citizen_total'] = value row['us_citizen_total_moe'] = moe return row