def setUp(self): self.df = pd.DataFrame({ 'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'category': ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b'], 'gender': ['F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M'], 'age': [1, 2, 3, 1, 2, 3, 1, 2, 3, 1] }) self.cardinality = cardinality.calculate_cardinality(self.df)
def test_calculates_cardinality(self): expected = pd.DataFrame({ 'Feature Name': ['id', 'age', 'category', 'gender', 'boring'], 'unique_value_count': [4, 3, 3, 2, 1], 'unique_ratio': [1, 0.75, 0.75, 0.5, 0.25] }) result = cardinality.calculate_cardinality(self.df) for column in expected: self.assertEqual(result[column].all(), expected[column].all())
def setUp(self): self.df = pd.DataFrame({ 'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'category': ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b'], 'gender': ['F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M'], 'age': [1, 2, 3, 1, 2, 3, 1, 2, 3, 1], 'boring': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] }) self.df['bad_string'] = 'yup' self.df['bad_float'] = 3.33 self.df['bad_int'] = 1 self.df['bad_bool'] = False self.cardinality = cardinality.calculate_cardinality(self.df)
def test_returns_dataframe(self): self.assertIsInstance( cardinality.calculate_cardinality(self.df), pd.DataFrame)