Ejemplo n.º 1
0
 def test_unique_values(self):
     """Test if the validator checks for the unique values."""
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     cleaned = validator.clean()
     self.assertItemsEqual(cleaned.unique(),
                           self.dataframe['Species'].unique())
Ejemplo n.º 2
0
 def test_unique_values(self):
     """Test if the validator checks for the unique values."""
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     cleaned = validator.clean()
     self.assertItemsEqual(cleaned.unique(),
                           self.dataframe['Species'].unique())
Ejemplo n.º 3
0
 def test_postprocessor(self):
     """Test if postporocessors work for series data."""
     self.species_rules['postprocessors'] = [_dummy_postproc]
     validator = SeriesValidator(data=self.species, rules=self.species_rules)
     try:
         cleaned = validator.clean()
         self.assertNotIn("setosa", cleaned.unique())
     finally:
         del self.species_rules['postprocessors']
Ejemplo n.º 4
0
 def test_postprocessor(self):
     """Test if postporocessors work for series data."""
     self.species_rules['postprocessors'] = [_dummy_postproc]
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     try:
         cleaned = validator.clean()
         self.assertNotIn("setosa", cleaned.unique())
     finally:
         del self.species_rules['postprocessors']
Ejemplo n.º 5
0
 def test_drop_duplicates(self):
     """Check if the SeriesValidator drops duplicates in the series."""
     self.species_rules['drop_duplicates'] = True
     try:
         series = self.species.unique().tolist()
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.shape[0], 3)
         self.assertItemsEqual(cleaned.tolist(), series)
     finally:
         self.species_rules['drop_duplicates'] = False
Ejemplo n.º 6
0
    def test_bad_unique_values(self):
        """Test if the validator drops values not specified in the schema."""
        # Add some bogus values
        noise = np.random.choice(['lily', 'petunia'], size=(50,))
        species = np.hstack((self.species.values, noise))
        np.random.shuffle(species)
        species = pd.Series(species)

        validator = SeriesValidator(data=species, rules=self.species_rules)
        cleaned = validator.clean()
        self.assertItemsEqual(cleaned.unique(),
                              self.dataframe['Species'].unique())
Ejemplo n.º 7
0
 def test_drop_duplicates(self):
     """Check if the SeriesValidator drops duplicates in the series."""
     self.species_rules['drop_duplicates'] = True
     try:
         series = self.species.unique().tolist()
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.shape[0], 3)
         self.assertItemsEqual(cleaned.tolist(), series)
     finally:
         self.species_rules['drop_duplicates'] = False
Ejemplo n.º 8
0
    def test_bad_unique_values(self):
        """Test if the validator drops values not specified in the schema."""
        # Add some bogus values
        noise = np.random.choice(['lily', 'petunia'], size=(50,))
        species = np.hstack((self.species.values, noise))
        np.random.shuffle(species)
        species = pd.Series(species)

        validator = SeriesValidator(data=species, rules=self.species_rules)
        cleaned = validator.clean()
        self.assertItemsEqual(cleaned.unique(),
                              self.dataframe['Species'].unique())
Ejemplo n.º 9
0
 def test_min_max_rules(self):
     """Test if the validator enforces min and max values from schema."""
     self.sepal_length_rules['min'] = 5.0
     self.sepal_length_rules['max'] = 7.0
     try:
         validator = SeriesValidator(data=self.sepal_length,
                                     rules=self.sepal_length_rules)
         cleaned = validator.clean()
         self.assertLessEqual(cleaned.max(), 7.0)
         self.assertGreaterEqual(cleaned.min(), 5.0)
     finally:
         del self.sepal_length_rules['max']
         del self.sepal_length_rules['min']
Ejemplo n.º 10
0
 def test_min_max_rules(self):
     """Test if the validator enforces min and max values from schema."""
     self.sepal_length_rules['min'] = 5.0
     self.sepal_length_rules['max'] = 7.0
     try:
         validator = SeriesValidator(data=self.sepal_length,
                                     rules=self.sepal_length_rules)
         cleaned = validator.clean()
         self.assertLessEqual(cleaned.max(), 7.0)
         self.assertGreaterEqual(cleaned.min(), 5.0)
     finally:
         del self.sepal_length_rules['max']
         del self.sepal_length_rules['min']
Ejemplo n.º 11
0
 def test_converter(self):
     """Test if the SeriesValidator properly applies converters."""
     self.species_rules['converters'] = [_dummy_converter]
     try:
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         cleaned = cleaned.astype(bool)
         filtered = self.species[cleaned]
         self.assertEqual(filtered.nunique(), 1)
         self.assertItemsEqual(filtered.unique(), ['setosa'])
     finally:
         del self.species_rules['converters']
Ejemplo n.º 12
0
 def test_drop_na(self):
     """Check if the SeriesValidator drops NAs in the series."""
     self.species_rules['drop_na'] = True
     try:
         unqs = np.random.choice(self.species.unique().tolist() + [None],
                                 size=(100, ))
         unqs = pd.Series(unqs)
         validator = SeriesValidator(data=unqs, rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.nunique(), self.species.nunique())
         self.assertItemsEqual(cleaned.unique().tolist(),
                               self.species.unique().tolist())
     finally:
         self.species_rules['drop_na'] = False
Ejemplo n.º 13
0
 def test_drop_na(self):
     """Check if the SeriesValidator drops NAs in the series."""
     self.species_rules['drop_na'] = True
     try:
         unqs = np.random.choice(self.species.unique().tolist() + [None],
                                 size=(100,))
         unqs = pd.Series(unqs)
         validator = SeriesValidator(data=unqs,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.nunique(), self.species.nunique())
         self.assertItemsEqual(cleaned.unique().tolist(),
                               self.species.unique().tolist())
     finally:
         self.species_rules['drop_na'] = False
Ejemplo n.º 14
0
    def test_regex_filter(self):
        """Test if the SeriesValidator does filtering based on the regular
        expression provided.
        """
        self.species_rules['regex'] = r'\b[a-z]+\b'
        try:
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertSeriesEqual(cleaned, self.dataframe['Species'])

            self.species = self.dataframe['Species'].copy()
            self.species = self.species.apply(lambda x: x.replace("e", "1"))
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertItemsEqual(cleaned.shape, (50,))
            self.assertItemsEqual(cleaned.unique().tolist(), ['virginica'])
        finally:
            del self.species_rules['regex']
Ejemplo n.º 15
0
    def test_regex_filter(self):
        """Test if the SeriesValidator does filtering based on the regular
        expression provided.
        """
        self.species_rules['regex'] = r'\b[a-z]+\b'
        try:
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertSeriesEqual(cleaned, self.dataframe['Species'])

            self.species = self.dataframe['Species'].copy()
            self.species = self.species.apply(lambda x: x.replace("e", "1"))
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertItemsEqual(cleaned.shape, (50,))
            self.assertItemsEqual(cleaned.unique().tolist(), ['virginica'])
        finally:
            del self.species_rules['regex']
Ejemplo n.º 16
0
 def test_numerical_series(self):
     """Test if the SeriesValidator works on a numerical series."""
     validator = SeriesValidator(data=self.sepal_length,
                                 rules=self.sepal_length_rules)
     cleaned = validator.clean()
     self.assertSeriesEqual(cleaned, self.dataframe['Sepal Length'])
Ejemplo n.º 17
0
 def test_numerical_series(self):
     """Test if the SeriesValidator works on a numerical series."""
     validator = SeriesValidator(data=self.sepal_length,
                                 rules=self.sepal_length_rules)
     cleaned = validator.clean()
     self.assertSeriesEqual(cleaned, self.dataframe['Sepal Length'])