class Anthology(Book): """Applies techniques to 'Cookbook' instances to assess performance. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to 'anthology' chapters (Optional[List['Chapter']]): iterable collection of steps and techniques to apply at each step. Defaults to an empty list. iterable(Optional[str]): name of property to store alternative proxy to 'reviews'. steps (Optional[List[Tuple[str, str]]]): tuples of steps and techniques. techniques (Optional[List['Technique']]): 'Technique' instances to apply. In an ordinary project, 'techniques' are not passed to an Anthology instance, but are instead created from 'steps' when the 'publish' method of a 'Project' instance is called. Defaults to an empty list. """ name: Optional[str] = dataclasses.field(default_factory = lambda: 'anthology') chapters: Optional[List['Review']] = dataclasses.field(default_factory = list) iterable: Optional[str] = dataclasses.field(default_factory = lambda: 'reviews') steps: Optional[List[Tuple[str, str]]] = dataclasses.field(default_factory = list) techniques: Optional[List['Technique']] = dataclasses.field(default_factory = list)
class SkaterExplain(Explainer): """Base class for explaining model performance. Args: idea (Optional[Idea]): an instance with project settings. """ name: Optional[str] = dataclasses.field(default_factory = lambda: 'skater') idea: Optional[core.Idea] = None """ Private Methods """ def _apply_explain(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_predict(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_rank(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_measure(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_report(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review
class SklearnExplain(Explainer): """Explains model performance with the sklearn package. Args: idea (Optional[Idea]): an instance with project settings. """ idea: Optional[core.Idea] = None name: Optional[str] = dataclasses.field(default_factory = lambda: 'sklearn') """ Private Methods """ def _apply_explain(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_predict(self, recipe: 'Recipe', review: 'Review') -> 'Review': """Makes predictions based upon sklearn package. Args: recipe ('Recipe'): a completed 'Recipe' from a 'Cookbook' instance. review ('Review'): an instance to complete based upon the performance of 'recipe'. Returns: 'Review': with assessment of 'recipe' performance. """ try: review.predictions[self.name] = self.estimator.predict( recipe.data.x_test) except AttributeError: pass try: review.predictions['_'.join([self.name, 'probabilities'])] = ( self.estimator.predict_proba(recipe.data.x_test)) except AttributeError: pass try: review.predictions['_'.join([self.name, 'log_probabilities'])] = ( self.estimator.predict_log_proba(recipe.data.x_test)) except AttributeError: pass return review def _apply_rank(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_measure(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_report(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review """ Core siMpLify Methods """ def draft(self) -> None: return self
class Analyst(Worker): """Object construction instructions used by a Project instance. Args: name (str): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. module (Optional[str]): name of module where object to use is located (can either be a siMpLify or non-siMpLify module). Defaults to 'simplify.core'. book (Optional[str]): name of Book object in 'module' to load. Defaults to 'Book'. chapter (Optional[str]): name of Chapter object in 'module' to load. Defaults to 'Chapter'. technique (Optional[str]): name of Book object in 'module' to load. Defaults to 'Technique'. publisher (Optional[str]): name of Publisher class in 'module' to load. Defaults to 'Publisher'. scholar (Optional[str]): name of Scholar class in 'module' to load. Defaults to 'Scholar'. steps (Optional[List[str]]): list of steps to execute. Defaults to an empty list. options (Optional[Union[str, Dict[str, Any]]]): a dictionary containing options for the 'Worker' instance to utilize or a string corresponding to a dictionary in 'module' to load. Defaults to an empty dictionary. data (Optional[str]): name of attribute or key in a 'Project' instance 'library' to use as a data object to apply methods to. Defaults to 'dataset'. import_folder (Optional[str]): name of attribute in 'filer' which contains the path to the default folder for importing data objects. Defaults to 'processed'. export_folder (Optional[str]): name of attribute in 'filer' which contains the path to the default folder for exporting data objects. Defaults to 'processed'. """ name: Optional[str] = dataclasses.field(default_factory = lambda: 'analyst') module: Optional[str] = dataclasses.field( default_factory = lambda: 'simplify.analyst.analyst') book: Optional[str] = dataclasses.field(default_factory = lambda: 'Cookbook') chapter: Optional[str] = dataclasses.field(default_factory = lambda: 'Recipe') technique: Optional[str] = dataclasses.field(default_factory = lambda: 'Tool') publisher: Optional[str] = dataclasses.field( default_factory = lambda: 'AnalystPublisher') scholar: Optional[str] = dataclasses.field(default_factory = lambda: 'AnalystScholar') options: Optional[str] = dataclasses.field(default_factory = lambda: 'Tools') idea: Optional[core.Idea] = None
class Cookbook(Book): """Standard class for iterable storage in the Analyst subpackage. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to 'cookbook' chapters (Optional[List['Chapter']]): iterable collection of steps and techniques to apply at each step. Defaults to an empty list. iterable(Optional[str]): name of property to store alternative proxy to 'recipes'. """ name: Optional[str] = dataclasses.field(default_factory = lambda: 'cookbook') chapters: Optional[List['Chapter']] = dataclasses.field(default_factory = list) iterable: Optional[str] = dataclasses.field(default_factory = lambda: 'recipes')
class Metric(Technique): """Base class for model performance evaluation measurements. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to None or __class__.__name__.lower() if super().__post_init__ is called. step (Optional[str]): name of step when the class instance is to be applied. Defaults to None. module (Optional[str]): name of module where object to use is located (can either be a siMpLify or non-siMpLify module). Defaults to 'simplify.core'. algorithm (Optional[object]): process object which executes the primary method of a class instance. Defaults to None. parameters (Optional[Dict[str, Any]]): parameters to be attached to 'algorithm' when 'algorithm' is instanced. Defaults to an empty dictionary. To Do: Add attributes for cluster metrics. """ name: Optional[str] = None step: Optional[str] = dataclasses.field(default_factory = lambda: 'measure') module: Optional[str] = None algorithm: Optional[object] = None parameters: Optional[Dict[str, Any]] = dataclasses.field(default_factory = dict) negative: Optional[bool] = False probabilities: Optional[bool] = False actual: Optional[str] = 'y_true' predicted: Optional[str] = 'y_pred' conditional: Optional[bool] = False
class Review(Chapter): """Evaluations for a 'Cookbook' recipe. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to None. If not passed, __class__.__name__.lower() is used. steps (Optional[List[str]]): explanations (Dict[str, pd.DataFrame]): results from any 'Explainer' methods applied to the data analysis. Defaults to an empty dictionary. predictions (Dict[str, pd.Series]): results from any 'Predictor' methods applied to the data analysis. Defaults to an empty dictionary. estimations (Dict[str, pd.Series]): results from any 'Estimator' methods applied to the data analysis. Defaults to an empty dictionary. importances (Dict[str, pd.DataFrame]): results from any 'Ranker' methods applied to the data analysis. Defaults to an empty dictionary. reports (Dict[str, pd.DataFrame]): results from any 'Reporter' methods applied to the data analysis. Defaults to an empty dictionary. """ name: Optional[str] = None steps: Optional[List[str]] = dataclasses.field(default_factory = list) explanations: Optional[Dict[str, pd.DataFrame]] = dataclasses.field( default_factory = dict) predictions: Optional[Dict[str, pd.Series]] = dataclasses.field( default_factory = dict) importances: Optional[Dict[str, pd.DataFrame]] = dataclasses.field( default_factory = dict) metrics: Optional[Dict[str, pd.Series]] = dataclasses.field( default_factory = dict) reports: Optional[Dict[str, pd.DataFrame]] = dataclasses.field( default_factory = dict)
class Critic(Worker): """Object construction instructions used by a Project instance. Args: name (str): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. module (Optional[str]): name of module where object to use is located (can either be a siMpLify or non-siMpLify module). Defaults to 'simplify.core'. book (Optional[str]): name of Book object in 'module' to load. Defaults to 'Book'. chapter (Optional[str]): name of Chapter object in 'module' to load. Defaults to 'Chapter'. technique (Optional[str]): name of Book object in 'module' to load. Defaults to 'Technique'. publisher (Optional[str]): name of Publisher class in 'module' to load. Defaults to 'Publisher'. scholar (Optional[str]): name of Scholar class in 'module' to load. Defaults to 'Scholar'. steps (Optional[List[str]]): list of steps to execute. Defaults to an empty list. options (Optional[Union[str, Dict[str, Any]]]): a dictionary containing options for the 'Worker' instance to utilize or a string corresponding to a dictionary in 'module' to load. Defaults to an empty dictionary. data (Optional[str]): name of attribute or key in a 'Project' instance 'books' to use as a data object to apply methods to. Defaults to 'dataset'. import_folder (Optional[str]): name of attribute in 'filer' which contains the path to the default folder for importing data objects. Defaults to 'processed'. export_folder (Optional[str]): name of attribute in 'filer' which contains the path to the default folder for exporting data objects. Defaults to 'processed'. idea (Optional[Idea]): shared project configuration settings. """ name: Optional[str] = dataclasses.field(default_factory = lambda: 'critic') module: Optional[str] = dataclasses.field( default_factory = lambda: 'simplify.critic.critic') book: Optional[str] = dataclasses.field(default_factory = lambda: 'Anthology') chapter: Optional[str] = dataclasses.field(default_factory = lambda: 'Review') technique: Optional[str] = dataclasses.field(default_factory = lambda: 'Evaluator') scholar: Optional[str] = dataclasses.field(default_factory = lambda: 'CriticScholar') options: Optional[str] = dataclasses.field(default_factory = lambda: 'Evaluators') data: Optional[str] = dataclasses.field(default_factory = lambda: 'analyst') idea: Optional[core.Idea] = None """ Core siMpLify Methods """ def outline(self) -> Dict[str, List[str]]: """Creates dictionary with techniques for each step. Returns: Dict[str, Dict[str, List[str]]]: dictionary with keys of steps and values of lists of techniques. """ catalog = {} steps = self._get_settings( section = self.name, prefix = self.name, suffix = 'steps') for step in steps: techniques = self._get_settings( section = self.name, prefix = self.name, suffix = 'techniques') catalog[step] = [] for technique in techniques: if technique in self.options: catalog[step].append(technique) return catalog
class Recipe(Chapter): """Standard class for bottom-level Analyst subpackage iterable storage. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to None. If not passed, __class__.__name__.lower() is used. steps (Optional[List[Tuple[str, str]]]): tuples of steps and techniques. techniques (Optional[List['Technique']]): 'Technique' instances to apply. In an ordinary project, 'techniques' are not passed to a Chapter instance, but are instead created from 'steps' when the 'publish' method of a 'Project' instance is called. Defaults to an empty list. """ name: Optional[str] = None steps: Optional[List[Tuple[str, str]]] = dataclasses.field(default_factory = list) techniques: Optional[List['Technique']] = dataclasses.field(default_factory = list) """ Dunder Methods """ def __iter__(self) -> Iterable: """Returns iterable of 'techniques' or 'steps'. Returns: Iterable: of 'techniques' or 'steps', if 'techniques' do not exist. """ if self.techniques: return iter(self.techniques) else: return iter(self.steps) def __len__(self) -> int: """Returns length of 'techniques' or 'steps'. Returns: Integer: length of 'techniques' or 'steps', if 'techniques' do not exist. """ if self.techniques: return len(self.techniques) else: return len(self.steps) """ Proxy Property Methods """ def _proxy_getter(self) -> List['Technique']: """Proxy getter for 'techniques'. Returns: List['Technique']. """ return self.techniques def _proxy_setter(self, value: List['Technique']) -> None: """Proxy setter for 'techniques'. Args: value (List['Technique']): list of 'Technique' instances to store. """ self.techniques = value return self def _proxy_deleter(self) -> None: """Proxy deleter for 'techniques'.""" self.techniques = [] return self """ Public Methods """ def add(self, techniques: Union[ List['Technique'], 'Technique', List[Tuple[str, str]], Tuple[str, str]]) -> None: """Combines 'techniques' with 'steps' or 'techniques' attribute. If a tuple or list of tuples is passed, 'techniques' are added to the 'steps' attribute. Otherwise, they are added to the 'techniques' attribute. Args: techniques (Union[List['Technique'], 'Technique', List[Tuple[str, str]], Tuple[str, str]]): a 'Technique' instance or tuple used to create one. """ if isinstance(utilities.listify(techniques)[0], Tuple): self.steps.extend(utilities.listify(techniques)) else: self.techniques.extend(utilities.listify(techniques)) return self
class Tool(Technique): """Base method wrapper for applying algorithms to data. Args: name (Optional[str]): designates the name of the class used for internal referencing throughout siMpLify. If the class needs settings from the shared 'Idea' instance, 'name' should match the appropriate section name in 'Idea'. When subclassing, it is a good idea to use the same 'name' attribute as the base class for effective coordination between siMpLify classes. 'name' is used instead of __class__.__name__ to make such subclassing easier. Defaults to None or __class__.__name__.lower() if super().__post_init__ is called. step (Optional[str]): name of step when the class instance is to be applied. Defaults to None. module (Optional[str]): name of module where object to use is located (can either be a siMpLify or non-siMpLify module). Defaults to 'simplify.core'. algorithm (Optional[object]): process object which executes the primary method of a class instance. Defaults to None. parameters (Optional[Dict[str, Any]]): parameters to be attached to 'algorithm' when 'algorithm' is instanced. Defaults to an empty dictionary. """ name: Optional[str] = None step: Optional[str] = None module: Optional[str] = None algorithm: Optional[object] = None parameters: Optional[Dict[str, Any]] = dataclasses.field(default_factory = dict) default: Optional[Dict[str, Any]] = dataclasses.field(default_factory = dict) required: Optional[Dict[str, Any]] = dataclasses.field(default_factory = dict) runtime: Optional[Dict[str, str]] = dataclasses.field(default_factory = dict) selected: Optional[Union[bool, List[str]]] = False data_dependent: Optional[Dict[str, str]] = dataclasses.field(default_factory = dict) parameter_space: Optional[Dict[str, List[Union[int, float]]]] = dataclasses.field( default_factory = dict) fit_method: Optional[str] = dataclasses.field(default_factory = lambda: 'fit') transform_method: Optional[str] = dataclasses.field( default_factory = lambda: 'transform') """ Core siMpLify Methods """ def apply(self, data: 'Dataset') -> 'Dataset': if data.stages.current in ['full']: self.fit(x = data.x, y = data.y) data.x = self.transform(x = data.x, y = data.y) else: self.fit(x = data.x_train, y = data.y_train) data.x_train = self.transform(x = data.x_train, y = data.y_train) data.x_test = self.transform(x = data.x_test, y = data.y_test) return data """ Scikit-Learn Compatibility Methods """ @numpy_shield def fit(self, x: Optional[Union[pd.DataFrame, np.ndarray]] = None, y: Optional[Union[pd.Series, np.ndarray]] = None) -> None: """Generic fit method for partial compatibility to sklearn. Args: x (Optional[Union[pd.DataFrame, np.ndarray]]): independent variables/features. y (Optional[Union[pd.Series, np.ndarray]]): dependent variable/label. Raises: AttributeError if no 'fit' method exists for 'technique'. """ x, y = check_X_y(X = x, y = y, accept_sparse = True) if self.fit_method is not None: if y is None: getattr(self.algorithm, self.fit_method)(x) else: self.algorithm = self.algorithm.fit(x, y) return self @numpy_shield def transform(self, x: Optional[Union[pd.DataFrame, np.ndarray]] = None, y: Optional[Union[pd.Series, np.ndarray]] = None) -> pd.DataFrame: """Generic transform method for partial compatibility to sklearn. Args: x (Optional[Union[pd.DataFrame, np.ndarray]]): independent variables/features. y (Optional[Union[pd.Series, np.ndarray]]): dependent variable/label. Returns: transformed x or data, depending upon what is passed to the method. Raises: AttributeError if no 'transform' method exists for local 'process'. """ if self.transform_method is not None: try: return getattr(self.algorithm, self.transform_method)(x) except AttributeError: return x else: return x
class ShapExplain(Explainer): """Base class for explaining model performance. Args: idea (Optional[Idea]): an instance with project settings. """ idea: Optional[core.Idea] = None name: Optional[str] = dataclasses.field(default_factory = lambda: 'shap') """ Private Methods """ def _apply_explain(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_predict(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_rank(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_measure(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_report(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _set_algorithm(self, data: 'Chapter') -> object: try: algorithm = self.options[self.algorithm_types[model.name]] except KeyError: algorithm = self.options['kernel'] return algorithm.load('algorithm') def _apply_to_chapter(self, chapter: 'Chapter') -> 'Chapter': print('test algo', self.algorithm) print('test model', self.model.algorithm) self.algorithm = self.algorithm( model = self.model.algorithm, data = getattr(chapter.data, '_'.join( ['x', self.idea['critic']['data_to_review']]))) chapter.explanations['shap_values'] = self.algorithm.shap_values( getattr(chapter.data, '_'.join( ['x', self.idea['critic']['data_to_review']]))) if self.algorithm_types[self.model] in ['tree']: chapter.explanations['shap_interactions'] = ( self.algorithm.shap_interaction_values( getattr(chapter.data, '_'.join( ['x', self.idea['critic']['data_to_review']])))) import shap shap.initjs() shap.force_plot(self.algorithm.expected_value, shap_values[0,:], X.iloc[0,:]) return chapter """ Core siMpLify Methods """ def draft(self) -> None: self.options = { 'deep' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'DeepExplainer'), 'kernel' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'KernelExplainer'), 'linear' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'LinearExplainer'), 'tree' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'TreeExplainer')} self.algorithm_types = { 'baseline': 'none', 'catboost': 'tree', 'decision_tree': 'tree', 'lasso': 'linear', 'lasso_lars': 'linear', 'light_gbm': 'tree', 'logit': 'linear', 'ols': 'linear', 'random_forest': 'tree', 'ridge': 'linear', 'svm_linear': 'linear', 'tensor_flow': 'deep', 'torch': 'deep', 'xgboost': 'tree'} return self def apply(self, data: 'Chapter') -> 'Chapter': try: self.model = self._get_estimator(chapter = data) self.algorithm = self.options[self.algorithm_types[self.model.name]] except KeyError: self.algorithm = options['kernel'] self.algorithm = self.algorithm.load('algorithm') self._apply_to_chapter(chapter = data) return data
class Eli5Explain(Explainer): """Explains model performance with the ELI5 package. Args: idea (Optional[Idea]): an instance with project settings. """ idea: Optional[core.Idea] = None name: Optional[str] = dataclasses.field(default_factory = lambda: 'eli5') """ Private Methods """ def _apply_explain(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_predict(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_rank(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_measure(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review def _apply_report(self, recipe: 'Recipe', review: 'Review') -> 'Review': return review """ Core siMpLify Methods """ def draft(self) -> None: self.options = { 'permutation' : Evaluator( name = 'permutation_importance', module = 'eli5.sklearn', algorithm = 'PermutationImportance', runtime = {'random_state': 'seed', 'estimator': 'estimator'}), 'kernel' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'KernelExplainer'), 'linear' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'LinearExplainer'), 'tree' : Evaluator( name = 'shap_explanation', module = 'shap', algorithm = 'TreeExplainer')} self.algorithm_types = { 'baseline': 'none', 'catboost': 'specific', 'decision_tree': 'specific', 'lasso': 'specific', 'lasso_lars': 'specific', 'light_gbm': 'specific', 'logit': 'specific', 'ols': 'specific', 'random_forest': 'specific', 'ridge': 'specific', 'svm_linear': 'specific', 'tensor_flow': 'permutation', 'torch': 'permutation', 'xgboost': 'specific'} return self def apply(self, data: 'Chapter') -> 'Chapter': base_score, score_decreases = get_score_importances(score_func, X, y) feature_importances = np.mean(score_decreases, axis = 0) self.permutation_weights = show_weights( self.permutation_importances, feature_names = recipe.dataset.columns.keys()) return data