def test_infer_total_from_new(self): # Ensure that total can be inferred from new_* values new_only_data = DataFrame.from_records( [ {k: v for k, v in row.items() if "total" not in k} for _, row in NEW_AND_TOTAL_TEST_DATA.iterrows() ] ) # Assert that only the new_* columns + index have been filtered self.assertSetEqual(set(new_only_data.columns), set(["key", "date", "new_value_column"])) # Compute the total_* values from new_* inferred_data = infer_new_and_total(new_only_data) # Ensure that only the expected columns (and all the expected columns) are present self.assertSetEqual( set(inferred_data.columns), set(["key", "date", "new_value_column", "total_value_column"]), ) # Compare the result with the expected values inferred_total_values = inferred_data.total_value_column expected_total_values = NEW_AND_TOTAL_TEST_DATA.total_value_column self.assertListEqual(inferred_total_values.to_list(), expected_total_values.to_list())
def test_infer_new_from_total(self): # Ensure that total can be inferred from new_* values new_only_data = DataFrame.from_records( [ {k: v for k, v in row.items() if "new" not in k} for _, row in NEW_AND_TOTAL_TEST_DATA.iterrows() ] ) # Assert that only the total_* columns + index have been filtered self.assertSetEqual(set(new_only_data.columns), set(["key", "date", "total_value_column"])) # Compute the total_* values from new_* inferred_data = infer_new_and_total(new_only_data) # Ensure that only the expected columns (and all the expected columns) are present self.assertSetEqual( set(inferred_data.columns), set(["key", "date", "new_value_column", "total_value_column"]), ) # We can't infer new_* for the first value! test_data = NEW_AND_TOTAL_TEST_DATA.copy() test_data.loc[test_data.date == "2020-01-01", "new_value_column"] = numpy.nan expected_new_values = test_data.new_value_column # Compare the result with the expected values inferred_new_values = inferred_data.new_value_column # Workaround to remove nans because nan != nan self.assertListEqual( inferred_new_values.dropna().to_list(), expected_new_values.dropna().to_list() )
def test_infer_nothing(self): # Ensure that no columns are added when both new_* and total_* are present self.assertSetEqual( set(NEW_AND_TOTAL_TEST_DATA.columns), set(["key", "date", "new_value_column", "total_value_column"]), ) # Infer all missing new_ and total_ values, which should be none inferred_data = infer_new_and_total(NEW_AND_TOTAL_TEST_DATA) # Ensure that only the expected columns (and all the expected columns) are present self.assertSetEqual( set(inferred_data.columns), set(["key", "date", "new_value_column", "total_value_column"]), )