예제 #1
0
    def display_model_analysis(self):
        """
        Displays information about the model used : class name, library name, library version,
        model parameters, ...
        """
        print_md(f"**Model used :** {self.explainer.model.__class__.__name__}")

        print_md(f"**Library :** {self.explainer.model.__class__.__module__}")

        for name, module in sorted(sys.modules.items()):
            if hasattr(module, '__version__') \
                    and self.explainer.model.__class__.__module__.split('.')[0] == module.__name__:
                print_md(f"**Library version :** {module.__version__}")

        print_md("**Model parameters :** ")
        model_params = self.explainer.model.__dict__
        table_template = template_env.get_template("double_table.html")
        print_html(
            table_template.render(
                columns1=["Parameter key", "Parameter value"],
                rows1=[{
                    "name": truncate_str(str(k), 50),
                    "value": truncate_str(str(v), 300)
                } for k, v in list(model_params.items())[:len(model_params) //
                                                         2:]
                       ],  # Getting half of the parameters
                columns2=["Parameter key", "Parameter value"],
                rows2=[{
                    "name": truncate_str(str(k), 50),
                    "value": truncate_str(str(v), 300)
                } for k, v in list(model_params.items())[len(model_params) //
                                                         2:]
                       ]  # Getting 2nd half of the parameters
            ))
        print_md('---')
예제 #2
0
    def display_model_explainability(self):
        """
        Displays explainability of the model as computed in SmartPlotter object
        """
        print_md(
            "*Note : the explainability graphs were generated using the test set only.*"
        )
        explainability_template = template_env.get_template(
            "explainability.html")
        explain_data = list()
        multiclass = True if (self.explainer._classes
                              and len(self.explainer._classes) > 2) else False
        c_list = self.explainer._classes if multiclass else [
            0
        ]  # list just used for multiclass
        for index_label, label in enumerate(
                c_list):  # Iterating over all labels in multiclass case
            label_value = self.explainer.check_label_name(
                label)[2] if multiclass else ''
            fig_features_importance = self.explainer.plot.features_importance(
                label=label)

            explain_contrib_data = list()
            list_cols_labels = [
                self.explainer.features_dict.get(col, col)
                for col in self.col_names
            ]
            for feature_label in sorted(list_cols_labels):
                feature = self.explainer.inv_features_dict.get(
                    feature_label, feature_label)
                fig = self.explainer.plot.contribution_plot(feature,
                                                            label=label,
                                                            max_points=200)
                explain_contrib_data.append({
                    'feature_index':
                    int(self.explainer.inv_columns_dict[feature]),
                    'name':
                    feature,
                    'description':
                    self.explainer.features_dict[feature],
                    'plot':
                    plotly.io.to_html(fig,
                                      include_plotlyjs=False,
                                      full_html=False)
                })
            explain_data.append({
                'index':
                index_label,
                'name':
                label_value,
                'feature_importance_plot':
                plotly.io.to_html(fig_features_importance,
                                  include_plotlyjs=False,
                                  full_html=False),
                'features':
                explain_contrib_data
            })
        print_html(explainability_template.render(labels=explain_data))
        print_md('---')
예제 #3
0
    def display_dataset_analysis(self,
                                 global_analysis: bool = True,
                                 univariate_analysis: bool = True,
                                 target_analysis: bool = True,
                                 multivariate_analysis: bool = True):
        """
        This method performs and displays an exploration of the data given.
        It allows to compare train and test values for each part of the analysis.

        The parameters of the method allow to filter which part to display or not.

        Parameters
        ----------
        global_analysis : bool
            Whether or not to display the global analysis part.
        univariate_analysis : bool
            Whether or not to display the univariate analysis part.
        target_analysis : bool
            Whether or not to display the target analysis part that plots
            the distribution of the target variable.
        multivariate_analysis : bool
            Whether or not to display the multivariate analysis part
        """
        if global_analysis:
            print_md("### Global analysis")
            self._display_dataset_analysis_global()

        if univariate_analysis:
            print_md("### Univariate analysis")
            self._perform_and_display_analysis_univariate(
                df=self.df_train_test,
                col_splitter="data_train_test",
                split_values=["test", "train"],
                names=["Prediction dataset", "Training dataset"],
                group_id='univariate')
        if target_analysis:
            df_target = self._create_train_test_df(
                test=pd.DataFrame({self.target_name: self.y_test},
                                  index=range(len(self.y_test)))
                if self.y_test is not None else None,
                train=pd.DataFrame({self.target_name: self.y_train},
                                   index=range(len(self.y_train)))
                if self.y_train is not None else None)
            if df_target is not None:
                if target_analysis:
                    print_md("### Target analysis")
                    self._perform_and_display_analysis_univariate(
                        df=df_target,
                        col_splitter="data_train_test",
                        split_values=["test", "train"],
                        names=["Prediction dataset", "Training dataset"],
                        group_id='target')
        if multivariate_analysis:
            print_md("### Mutlivariate analysis")
            fig_corr = generate_correlation_matrix_fig(self.df_train_test,
                                                       max_features=20)
            print_html(convert_fig_to_html(fig=fig_corr))
        print_md('---')
예제 #4
0
 def display_title_description(self):
     """
     Displays title of the report and its description if defined.
     """
     print_html(
         f"""<h1 style="text-align:center">{self.title_story}</p> """)
     if self.title_description != '':
         print_html(
             f'<blockquote class="panel-warning text_cell_render">{self.title_description} </blockquote>'
         )
예제 #5
0
    def _perform_and_display_analysis_univariate(self, df: pd.DataFrame,
                                                 col_splitter: str,
                                                 split_values: list,
                                                 names: list, group_id: str):
        col_types = compute_col_types(df)
        n_splits = df[col_splitter].nunique()
        test_stats_univariate = perform_univariate_dataframe_analysis(
            df.loc[df[col_splitter] == split_values[0]], col_types=col_types)
        if n_splits > 1:
            train_stats_univariate = perform_univariate_dataframe_analysis(
                df.loc[df[col_splitter] == split_values[1]],
                col_types=col_types)

        univariate_template = template_env.get_template("univariate.html")
        univariate_features_desc = list()
        list_cols_labels = [
            self.explainer.features_dict.get(col, col)
            for col in df.drop(col_splitter, axis=1).columns.to_list()
        ]
        for col_label in sorted(list_cols_labels):
            col = self.explainer.inv_features_dict.get(col_label, col_label)
            fig = generate_fig_univariate(df_all=df,
                                          col=col,
                                          hue=col_splitter,
                                          type=col_types[col])
            df_col_stats = self._stats_to_table(
                test_stats=test_stats_univariate[col],
                train_stats=train_stats_univariate[col]
                if n_splits > 1 else None,
                names=names)
            univariate_features_desc.append({
                'feature_index':
                int(self.explainer.inv_columns_dict.get(col, 0)),
                'name':
                col,
                'type':
                str(series_dtype(df[col])),
                'description':
                col_label,
                'table':
                df_col_stats.to_html(classes="greyGridTable"),
                'image':
                convert_fig_to_html(fig)
            })
        print_html(
            univariate_template.render(features=univariate_features_desc,
                                       groupId=group_id))
예제 #6
0
    def display_model_performance(self):
        """
        Displays the performance of the model. The metrics are computed using the config dict.

        Metrics should be given as a list of dict. Each dict contains they following keys :
        'path' (path to the metric function, ex: 'sklearn.metrics.mean_absolute_error'),
        'name' (optional, name of the metric as displayed in the report),
        and 'use_proba_values' (optional, possible values are False (default) or True
        if the metric uses proba values instead of predicted values).

        For example :
        config['metrics'] = [
                {
                    'path': 'sklearn.metrics.mean_squared_error',
                    'name': 'Mean absolute error',  # Optional : name that will be displayed next to the metric
                    'y_pred': 'predicted_values'  # Optional
                },
                {
                    'path': 'Scoring_AP.utils.lift10',  # Custom function path
                    'name': 'Lift10',
                    'y_pred': 'proba_values'  # Use proba values instead of predicted values
                }
            ]
        """
        if self.y_test is None:
            logging.info(
                "No labels given for test set. Skipping model performance part"
            )
            return

        print_md("### Univariate analysis of target variable")
        df = pd.concat([
            pd.DataFrame({
                self.target_name: self.y_pred
            }).assign(_dataset="pred"),
            pd.DataFrame({
                self.target_name: self.y_test
            }).assign(_dataset="true") if self.y_test is not None else None
        ])
        self._perform_and_display_analysis_univariate(
            df=df,
            col_splitter="_dataset",
            split_values=["pred", "true"],
            names=["Prediction values", "True values"],
            group_id='target-distribution')

        if 'metrics' not in self.config.keys():
            logging.info(
                "No 'metrics' key found in report config dict. Skipping model performance part."
            )
            return
        print_md("### Metrics")

        for metric in self.config['metrics']:
            if 'name' not in metric.keys():
                metric['name'] = metric['path']

            if metric['path'] in ['confusion_matrix', 'sklearn.metrics.confusion_matrix'] or \
                    metric['name'] == 'confusion_matrix':
                print_md(f"**{metric['name']} :**")
                print_html(
                    convert_fig_to_html(
                        generate_confusion_matrix_plot(y_true=self.y_test,
                                                       y_pred=self.y_pred)))
            else:
                try:
                    metric_fn = get_callable(path=metric['path'])
                    #  Look if we should use proba values instead of predicted values
                    if 'use_proba_values' in metric.keys(
                    ) and metric['use_proba_values'] is True:
                        y_pred = self.explainer.proba_values
                    else:
                        y_pred = self.y_pred
                    res = metric_fn(self.y_test, y_pred)
                except Exception as e:
                    logging.info(
                        f"Could not compute following metric : {metric['path']}. \n{e}"
                    )
                    continue
                if isinstance(res, Number):
                    res = display_value(round_to_k(res, 3))
                    print_md(f"**{metric['name']} :** {res}")
                elif isinstance(res, (list, tuple, np.ndarray)):
                    print_md(f"**{metric['name']} :**")
                    print_html(
                        pd.DataFrame(res).to_html(classes="greyGridTable"))
                elif isinstance(res, str):
                    print_md(f"**{metric['name']} :**")
                    print_html(f"<pre>{res}</pre>")
                else:
                    logging.info(
                        f"Could not compute following metric : {metric['path']}. \n"
                        f"Result of type {res} cannot be displayed")
        print_md('---')
예제 #7
0
 def _display_dataset_analysis_global(self):
     df_stats_global = self._stats_to_table(
         test_stats=perform_global_dataframe_analysis(self.x_pred),
         train_stats=perform_global_dataframe_analysis(self.x_train_pre),
         names=["Prediction dataset", "Training dataset"])
     print_html(df_stats_global.to_html(classes="greyGridTable"))