def test_unique_values(self): """Test if the validator checks for the unique values.""" validator = SeriesValidator(data=self.species, rules=self.species_rules) cleaned = validator.clean() self.assertItemsEqual(cleaned.unique(), self.dataframe['Species'].unique())
def test_postprocessor(self): """Test if postporocessors work for series data.""" self.species_rules['postprocessors'] = [_dummy_postproc] validator = SeriesValidator(data=self.species, rules=self.species_rules) try: cleaned = validator.clean() self.assertNotIn("setosa", cleaned.unique()) finally: del self.species_rules['postprocessors']
def test_regex_filter(self): """Test if the SeriesValidator does filtering based on the regular expression provided. """ self.species_rules['regex'] = r'\b[a-z]+\b' try: validator = SeriesValidator(data=self.species, rules=self.species_rules) cleaned = validator.clean() self.assertSeriesEqual(cleaned, self.dataframe['Species']) self.species = self.dataframe['Species'].copy() self.species = self.species.apply(lambda x: x.replace("e", "1")) validator = SeriesValidator(data=self.species, rules=self.species_rules) cleaned = validator.clean() self.assertItemsEqual(cleaned.shape, (50,)) self.assertItemsEqual(cleaned.unique().tolist(), ['virginica']) finally: del self.species_rules['regex']
def test_drop_duplicates(self): """Check if the SeriesValidator drops duplicates in the series.""" self.species_rules['drop_duplicates'] = True try: series = self.species.unique().tolist() validator = SeriesValidator(data=self.species, rules=self.species_rules) cleaned = validator.clean() self.assertEqual(cleaned.shape[0], 3) self.assertItemsEqual(cleaned.tolist(), series) finally: self.species_rules['drop_duplicates'] = False
def test_bad_unique_values(self): """Test if the validator drops values not specified in the schema.""" # Add some bogus values noise = np.random.choice(['lily', 'petunia'], size=(50,)) species = np.hstack((self.species.values, noise)) np.random.shuffle(species) species = pd.Series(species) validator = SeriesValidator(data=species, rules=self.species_rules) cleaned = validator.clean() self.assertItemsEqual(cleaned.unique(), self.dataframe['Species'].unique())
def test_min_max_rules(self): """Test if the validator enforces min and max values from schema.""" self.sepal_length_rules['min'] = 5.0 self.sepal_length_rules['max'] = 7.0 try: validator = SeriesValidator(data=self.sepal_length, rules=self.sepal_length_rules) cleaned = validator.clean() self.assertLessEqual(cleaned.max(), 7.0) self.assertGreaterEqual(cleaned.min(), 5.0) finally: del self.sepal_length_rules['max'] del self.sepal_length_rules['min']
def test_converter(self): """Test if the SeriesValidator properly applies converters.""" self.species_rules['converters'] = [_dummy_converter] try: validator = SeriesValidator(data=self.species, rules=self.species_rules) cleaned = validator.clean() cleaned = cleaned.astype(bool) filtered = self.species[cleaned] self.assertEqual(filtered.nunique(), 1) self.assertItemsEqual(filtered.unique(), ['setosa']) finally: del self.species_rules['converters']
def test_drop_na(self): """Check if the SeriesValidator drops NAs in the series.""" self.species_rules['drop_na'] = True try: unqs = np.random.choice(self.species.unique().tolist() + [None], size=(100, )) unqs = pd.Series(unqs) validator = SeriesValidator(data=unqs, rules=self.species_rules) cleaned = validator.clean() self.assertEqual(cleaned.nunique(), self.species.nunique()) self.assertItemsEqual(cleaned.unique().tolist(), self.species.unique().tolist()) finally: self.species_rules['drop_na'] = False
def test_drop_na(self): """Check if the SeriesValidator drops NAs in the series.""" self.species_rules['drop_na'] = True try: unqs = np.random.choice(self.species.unique().tolist() + [None], size=(100,)) unqs = pd.Series(unqs) validator = SeriesValidator(data=unqs, rules=self.species_rules) cleaned = validator.clean() self.assertEqual(cleaned.nunique(), self.species.nunique()) self.assertItemsEqual(cleaned.unique().tolist(), self.species.unique().tolist()) finally: self.species_rules['drop_na'] = False
def test_numerical_series(self): """Test if the SeriesValidator works on a numerical series.""" validator = SeriesValidator(data=self.sepal_length, rules=self.sepal_length_rules) cleaned = validator.clean() self.assertSeriesEqual(cleaned, self.dataframe['Sepal Length'])