Esempio n. 1
0
 def do_validate(self, df):
     """
     Validations:
     - Check that all columns names (old and new) are not empty strings
     """
     for c in self.mapping.keys():
         if not isinstance(c, str) or c.strip == '':
             raise YeastValidationError(
                 f'New column name "{c}" should be a non empty string')
     for c in self.mapping.values():
         if not isinstance(c, str) or c.strip == '':
             raise YeastValidationError(
                 f'Old column name "{c}" should be a non empty string')
Esempio n. 2
0
 def do_validate(self, df):
     """
     Validations:
     - Columns must exist
     - Types must exist
     """
     for c, t in self.mapping.items():
         if c not in df.columns:
             raise YeastValidationError(
                 f'Column {c} not found on the DataFrame')
         if t not in self.type_mapper.keys():
             raise YeastValidationError(
                 f'Data type {t} not available. Choose from: {self.type_mapper.keys()}'
             )
Esempio n. 3
0
    def do_validate(self, df):
        """
        - Check that all columns are not empty strings
        - Check if the df contains all elements in columns
        """
        columns = self.resolve_selector(self.selector, df)

        if not all(isinstance(c, str) for c in columns):
            raise YeastValidationError('Invalid column names')

        matches = [c in df.columns for c in columns]
        if not all(matches):
            missing_columns = [c for c, v in zip(columns, matches) if not v]
            raise YeastValidationError(f'The following columns are missing: {missing_columns}')
Esempio n. 4
0
 def do_validate(self, df):
     """
     Validations:
     - Format is available
     """
     if self.case not in self.cases:
         raise YeastValidationError(f'Invalid format names. Choose between: {self.cases}')
Esempio n. 5
0
 def do_validate(self, gdf):
     """
     - A GroupByStep was applied before this step
     """
     if not isinstance(gdf, DataFrameGroupBy):
         raise YeastValidationError(
             'This step must be executed after a GroupByStep(...)')
Esempio n. 6
0
 def do_validate(self, df):
     """
     Perform custom validations. Expected signature (step/self, dataframe)
     - Validate that all parameters are callables or none
     - Call to perform all custom validations
     """
     if self.to_prepare is not None and not callable(self.to_prepare):
         raise YeastValidationError(
             'to_prepare must be a function like: to_prepare(step, df)')
     if self.to_bake is not None and not callable(self.to_bake):
         raise YeastValidationError(
             'to_bake must be a function like: to_bake(step, df)')
     if self.to_validate is not None and not callable(self.to_validate):
         raise YeastValidationError(
             'to_validate must be a function like: to_validate(step, df)')
     if self.to_validate:
         self.to_validate(self, df)
Esempio n. 7
0
 def do_validate(self, df):
     """
     - All columns on the mapping must exist on the df
     """
     columns = self.resolve_selector(self.selector, df)
     matches = [c in df.columns for c in columns]
     if not all(matches):
         missing_columns = [c for c, v in zip(columns, matches) if not v]
         raise YeastValidationError(f'The following columns are missing: {missing_columns}')
Esempio n. 8
0
 def do_validate(self, df):
     """
     Validations:
     - Check that expression is not empty
     """
     if not isinstance(self.expression,
                       str) or self.expression.strip() == '':
         raise YeastValidationError(
             'The expression must be a non empty string')
Esempio n. 9
0
 def do_validate(self, df):
     """
     - Mapping must be a string or a dict
     - All columns on the mapping must exist on the df
     """
     if type(self.mapping) not in [dict, str]:
         raise YeastValidationError(
             f'The replacement mapping must be a string or dict')
     if isinstance(self.mapping, dict):
         matches = [c in df.columns for c in self.mapping.keys()]
         if not all(matches):
             missing_columns = [
                 c for c, v in zip(self.mapping.keys(), matches) if not v
             ]
             raise YeastValidationError(
                 f'The following columns are missing: {missing_columns}')
     if isinstance(self.mapping, str) and self.mapping not in df.columns:
         raise YeastValidationError(
             f'The following column is missing: {self.mapping}')
Esempio n. 10
0
 def do_validate(self, df):
     """
     - All keys should be valid column names
     - All values should be callables or transformers or list of those things
     """
     for column in self.transformers.keys():
         if not isinstance(column, str) and column.strip() == '':
             raise YeastValidationError(
                 f'"{column}" is not a valid column name')
     for item in self.transformers.values():
         if type(item) in [list, tuple]:
             for subitem in item:
                 if not isinstance(subitem, Transformer) and not isinstance(
                         subitem, LambdaType):
                     raise YeastValidationError(
                         f"Transformer {subitem} not recognized")
         else:
             if not isinstance(item, Transformer) and not isinstance(
                     item, LambdaType):
                 raise YeastValidationError(
                     f"Transformer {item} not recognized")
Esempio n. 11
0
    def do_validate(self, df):
        """
        - Check if the df contains all listed columns
        """
        self.selector = self.resolve_selector(self.selector, df)

        matches = [c in df.columns for c in self.selector]
        if not all(matches):
            missing_columns = [
                c for c, v in zip(self.selector, matches) if not v
            ]
            raise YeastValidationError(
                f'The following columns are missing: {missing_columns}')
Esempio n. 12
0
 def do_validate(self, x):
     """
     - How must be a valid type
     - We are expecting DataFrame or Recipe on Right
     - Left must be a DataFrame
     - All columns in by must be on Left
     - All columns in by must be on Right
     - df is only supported if right is a Recipe
     """
     # Validate How parameter
     if self.how not in self.how_types:
         raise YeastValidationError(
             f'Join type "{self.how}" not between: {self.how_types}')
     # We are expecting DataFrame or Recipe on Right
     if not isinstance(self.y, DataFrame) and not isinstance(
             self.y, Recipe):
         raise YeastValidationError(
             'We are expecting a DataFrame or a Recipe or Right')
     # Left must be a DataFrame
     if not isinstance(x, DataFrame):
         raise YeastValidationError(
             "Previous Step didn't return a DataFrame")
     # All columns in by must be on Left
     if self.by:
         missing = [c for c in self.by if c not in x.columns]
         if missing:
             raise YeastValidationError(
                 f"Columns {missing} not found on the x side of the merge")
     # All columns in by must be on Right
     if self.by and isinstance(self.y, DataFrame):
         missing = [c for c in self.by if c not in self.y.columns]
         if missing:
             raise YeastValidationError(
                 f"Columns {missing} not found on the right side of the merge"
             )
     # self.df is only supported if right is a Recipe
     if self.df is not None and not isinstance(self.y, Recipe):
         raise YeastValidationError(
             f"'df' parameter is only supported if right is a Recipe")
Esempio n. 13
0
 def do_validate(self, df):
     if 'name' not in df.columns or 'rank' not in df.columns:
         raise YeastValidationError('Name or Rank not found')
Esempio n. 14
0
 def validate_columns_but_fail(step, df):
     if 'age' not in df.columns:
         raise YeastValidationError('Age not found')