Ejemplo n.º 1
0
 def test_sum(self):
     males_under_5, males_under_5_moe = 10154024, 3778
     females_under_5, females_under_5_moe = 9712936, 3911
     self.assertEqual(
         census_data_aggregator.approximate_sum(
             (males_under_5, males_under_5_moe),
             (females_under_5, females_under_5_moe)),
         (19866960, 5437.757350231803))
     # With multiple zeros
     self.assertEqual(
         census_data_aggregator.approximate_sum([0.0, 22], [0, 22], [0, 29],
                                                [41, 37]),
         (41, 47.01063709417264))
Ejemplo n.º 2
0
    def _aggregate(group_df):
        """
        The function that aggregates each group
        """
        out = {}
        for col in data_columns:
            # The name of the error column (if it exists)
            error_col = f"{col}_moe"

            # remove any NaN rows
            subset = group_df.dropna(subset=[col], how="any")

            # aggregat if we had any rows left
            if len(subset):

                # column values, margin of error (if it exists)
                args = np.column_stack([
                    subset[col],
                    subset.get(error_col, np.zeros(len(subset)))
                ])

                # do the aggregation
                aggval, moe = cda.approximate_sum(*args)
            else:
                aggval = moe = np.nan

            # store
            out[col] = aggval
            if error_col in subset.columns:
                out[f"{col}_moe"] = moe

        out["geometry"] = group_df.geometry.unary_union
        return pd.Series(out)
Ejemplo n.º 3
0
 def calculate_total_bachelors_or_higher_moe(row):
     if row['female_bachelors_or_higher_moe'] in list(MOE_MAP.values()):
         value = sum([
             row['female_bachelors_or_higher'],
             row['male_bachelors_or_higher']
         ])
         moe = None
     value, moe = census_data_aggregator.approximate_sum(
         (row['female_bachelors_degree'],
          row['female_bachelors_degree_moe']),
         (row['female_masters_degree'],
          row['female_masters_degree_moe']),
         (row['female_professional_school_degree'],
          row['female_professional_school_degree_moe']),
         (row['female_doctorate_degree'],
          row['female_doctorate_degree_moe']),
         (row['male_bachelors_degree'],
          row['male_bachelors_degree_moe']),
         (row['male_masters_degree'], row['male_masters_degree_moe']),
         (row['male_professional_school_degree'],
          row['male_professional_school_degree_moe']),
         (row['male_doctorate_degree'],
          row['male_doctorate_degree_moe']))
     row['total_bachelors_or_higher'] = value
     row['total_bachelors_or_higher_moe'] = moe
     return row
 def test_sum(self):
     males_under_5, males_under_5_moe = 10154024, 3778
     females_under_5, females_under_5_moe = 9712936, 3911
     self.assertEqual(
         census_data_aggregator.approximate_sum(
             (males_under_5, males_under_5_moe),
             (females_under_5, females_under_5_moe)),
         (19866960, 5437.757350231803))
     # With multiple zeros
     self.assertEqual(
         census_data_aggregator.approximate_sum([0.0, 22], [0, 22], [0, 29],
                                                [41, 37]),
         (41, 47.01063709417264))
     # From the ACS handbook examples
     single_women = ((135173, 3860), (43104, 2642), (24842, 1957))
     self.assertEqual(census_data_aggregator.approximate_sum(*single_women),
                      (203119, 5070.4647715963865))
Ejemplo n.º 5
0
def approximate_sum(row, cols):
    """
    Apply this function to a DataFrame, summing over the columns specified.

    Note
    ----
    This assumes a margin of error column exists
    """
    args = [(row[col], row[f"{col}_moe"]) for col in cols]
    return pd.Series(cda.approximate_sum(*args))
Ejemplo n.º 6
0
    def test_sum_ch8(self):
        # Never-married female characteristics from Table 8.1
        nmf_fairfax = (135173, 3860)
        nmf_arlington = (43104, 2642)
        nmf_alexandria = (24842, 1957)

        # Calculate aggregate pop and MOE
        agg_pop, agg_moe = census_data_aggregator.approximate_sum(
            nmf_fairfax, nmf_arlington, nmf_alexandria)

        self.assertEqual(agg_pop, 203119)
        self.assertAlmostEqual(agg_moe, 5070, places=0)
Ejemplo n.º 7
0
 def calculate_asians_moe(row):
     pprint(row)
     if row['asian_alone_moe'] in list(MOE_MAP.values()):
         value = sum([row['asian_alone'], row['native_hawaiian_and_pacific_islander']])
         moe = None
     value, moe = census_data_aggregator.approximate_sum(
         (row['native_hawaiian_and_pacific_islander'], row['native_hawaiian_and_pacific_islander_moe']),
         (row['asian_alone'], row['asian_alone_moe']),
     )
     row['asians_all'] = value
     row['asians_all_moe'] = moe
     return row
Ejemplo n.º 8
0
 def calculate_moe(row):
     pprint(row)
     if row['internet_without_subscription_moe'] in list(MOE_MAP.values()):
         value = sum([row['internet_without_subscription'], row['no_internet']])
         moe = None
     value, moe = census_data_aggregator.approximate_sum(
         (row['internet_without_subscription'], row['internet_without_subscription_moe']),
         (row['no_internet'], row['no_internet_moe']),
     )
     row['total_no_internet_and_no_subscription'] = value
     row['total_no_internet_and_no_subscription_moe'] = moe
     return row
Ejemplo n.º 9
0
 def calculate_other_moe(row):
     pprint(row)
     if row['american_indian_and_alaska_native_moe'] in list(MOE_MAP.values()):
         value = sum([row['american_indian_and_alaska_native'], row['other_alone'], row['two_or_more_races']])
         moe = None
     value, moe = census_data_aggregator.approximate_sum(
         (row['american_indian_and_alaska_native'], row['american_indian_and_alaska_native_moe']),
         (row['other_alone'], row['other_alone_moe']),
         (row['two_or_more_races'], row['two_or_more_races_moe']),
     )
     row['other_all'] = value
     row['other_all_moe'] = moe
     return row
Ejemplo n.º 10
0
    def test_proportion_ch8(self):
        # Total females aged 15 and older from Table 8.4
        tf15_fairfax = (466037, 391)
        tf15_arlington = (97360, 572)
        tf15_alexandria = (67101, 459)

        # Aggregate the values and MOEs
        denominator = census_data_aggregator.approximate_sum(
            tf15_fairfax, tf15_arlington, tf15_alexandria)

        numerator = (203119, 5070)

        # Calculate the proportion and its MOE
        proportion, moe = census_data_aggregator.approximate_proportion(
            numerator, denominator)

        self.assertAlmostEqual(proportion, 0.322, places=3)
        self.assertAlmostEqual(moe, 0.008, places=3)
Ejemplo n.º 11
0
    def test_ratio_ch8(self):
        # Never-married Males from table 8.5
        nmm_fairfax = (156720, 4222)
        nmm_arlington = (44613, 2819)
        nmm_alexandria = (25507, 2259)

        # Aggregate the values and MOEs
        numerator = census_data_aggregator.approximate_sum(
            nmm_fairfax, nmm_arlington, nmm_alexandria)

        denominator = (203119, 5070)

        # Calculate the proportion and its MOE
        ratio, moe = census_data_aggregator.approximate_ratio(
            numerator, denominator)

        self.assertAlmostEqual(ratio, 1.117, places=3)
        self.assertAlmostEqual(moe, 0.039, places=3)
Ejemplo n.º 12
0
 def calculate_moe(row):
     # our custom groups
     if row['us_citizen_by_naturalization_moe'] in list(
             MOE_MAP.values()):
         value = sum([
             row['us_citizen_by_naturalization'],
             row['us_citizen_born_abroad_american_parents'],
             row['us_citizen_born_puertorico_or_us_island_alone'],
             row['us_citizen_born_us']
         ])
         moe = None
     value, moe = census_data_aggregator.approximate_sum(
         (row['us_citizen_by_naturalization'],
          row['us_citizen_by_naturalization_moe']),
         (row['us_citizen_born_abroad_american_parents'],
          row['us_citizen_born_abroad_american_parents_moe']),
         (row['us_citizen_born_puertorico_or_us_island_alone'],
          row['us_citizen_born_puertorico_or_us_island_moe']),
         (row['us_citizen_born_us'], row['us_citizen_born_us_moe']),
     )
     row['us_citizen_total'] = value
     row['us_citizen_total_moe'] = moe
     return row