def test_upsample_agebin_raises_if_data_not_edge_aligned(self): # not lower-edge aligned age_bin = AgeBin.from_string('[6:10)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column) # not upper-edge aligned age_bin = AgeBin.from_string('[5:14)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column) # not lower or upper edge aligned age_bin = AgeBin.from_string('[6:14)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column)
def test_upsample_agebin_raises_if_data_does_not_contain_requested_agebin( self): # overlap lower age age_bin = AgeBin.from_string('[0:10)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column) # overlap upper age age_bin = AgeBin.from_string('[10:18)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column) # overlap both lower and upper ages (requesting too large an age range on both sides) age_bin = AgeBin.from_string('[4:16)') self.assertRaises(NotUpsampleable, self.grouped_data.apply, upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column)
def test_contains_works_with_AgeBin_and_string(self): big_ab = AgeBin(10, 99) ab = "[10, 15)" self.assertTrue(big_ab.contains(ab)) ab = AgeBin.from_string(ab) self.assertTrue(big_ab.contains(ab))
def test_merge_works_with_AgeBin_and_string(self): expected_ab = AgeBin(10, 99) ab1 = AgeBin(10, 15) ab2 = "[15:99)" self.assertEqual(ab1.merge(ab2), expected_ab) ab2 = AgeBin.from_string(ab2) self.assertEqual(ab1.merge(ab2), expected_ab)
def test_upsample_agebin_works(self): age_bin = AgeBin.from_string('[5:10)') result = self.grouped_data.apply( upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column).sort_values( 'Gender').reset_index(drop=True).sort_index() expected_result = [{ 'Gender': 'Male', 'AgeBin': '[5:10)', 'Prevalence': 0.1, 'Sim_Prevalence': 0.4, 'Count': 5 }, { 'Gender': 'Female', 'AgeBin': '[5:10)', 'Prevalence': 0.3, 'Sim_Prevalence': 0.2, 'Count': 20 }] expected_result = pd.DataFrame(expected_result).sort_values( 'Gender').reset_index(drop=True).sort_index() self.assertTrue(result.equals(expected_result)) age_bin = AgeBin.from_string('[10:15)') result = self.grouped_data.apply( upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column).sort_values( 'Gender').reset_index(drop=True).sort_index() expected_result = [{ 'Gender': 'Male', 'AgeBin': '[10:15)', 'Prevalence': 0.2, 'Sim_Prevalence': 0.3, 'Count': 15 }, { 'Gender': 'Female', 'AgeBin': '[10:15)', 'Prevalence': 0.4, 'Sim_Prevalence': 0.1, 'Count': 20 }] expected_result = pd.DataFrame(expected_result).sort_values( 'Gender').reset_index(drop=True).sort_index() self.assertTrue(result.equals(expected_result)) age_bin = AgeBin.from_string('[5:15)') result = self.grouped_data.apply( upsample_agebin, age_bin=age_bin, aggregated_cols=self.aggregation_columns, weighted_cols=self.weighted_columns, weighting_col=self.weighting_column).sort_values( 'Gender').reset_index(drop=True).sort_index() expected_result = [{ 'Gender': 'Male', 'AgeBin': '[5:15)', 'Prevalence': 0.175, 'Sim_Prevalence': 0.325, 'Count': 20 }, { 'Gender': 'Female', 'AgeBin': '[5:15)', 'Prevalence': 0.35, 'Sim_Prevalence': 0.15, 'Count': 40 }] expected_result = pd.DataFrame(expected_result).sort_values( 'Gender').reset_index(drop=True).sort_index() numerical_cols = ['Prevalence', 'Sim_Prevalence', 'Count'] other_cols = ['Gender', 'AgeBin'] # checking that numerical values are REALLY close; off a bit due to division in algorithm self.assertTrue( np.allclose(result[numerical_cols], expected_result[numerical_cols], atol=1e-16, rtol=0)) # checking non-numerical values are EXACT self.assertTrue(result[other_cols].equals(expected_result[other_cols]))
def test_from_string_works_properly(self): ab_string = '[0:;:99)' ab = AgeBin.from_string(ab_string) self.assertEqual(ab.start, 0) self.assertEqual(ab.end, 99) self.assertEqual(ab.delimiter, ':;:')