Ejemplo n.º 1
0
    def test_exexcute_report_1(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(working_dir=tmp_dir_path,
                       explainer=self.xpl,
                       project_info_file=os.path.join(current_path,
                                                      '../data/metadata.yaml'),
                       config=None,
                       notebook_path=None)
        assert os.path.exists(
            os.path.join(tmp_dir_path, 'smart_explainer.pickle'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'base_report.ipynb'))

        shutil.rmtree(tmp_dir_path)
Ejemplo n.º 2
0
    def test_export_and_save_report_1(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(
            working_dir=tmp_dir_path,
            explainer=self.xpl,
            project_info_file=os.path.join(current_path,
                                           '../data/metadata.yaml'),
        )

        outfile = os.path.join(tmp_dir_path, 'report.html')
        export_and_save_report(working_dir=tmp_dir_path, output_file=outfile)
        assert os.path.exists(outfile)
        shutil.rmtree(tmp_dir_path)
Ejemplo n.º 3
0
    def generate_report(self,
                        output_file,
                        project_info_file,
                        x_train=None,
                        y_train=None,
                        y_test=None,
                        title_story=None,
                        title_description=None,
                        metrics=None,
                        working_dir=None,
                        notebook_path=None,
                        kernel_name=None):
        """
        This method will generate an HTML report containing different information about the project.

        It analyzes the data and the model used in order to provide interesting
        insights that can be shared using the HTML format.

        It requires a project info yml file on which can figure different information about the project.

        Parameters
        ----------
        output_file : str
            Path to the HTML file to write.
        project_info_file : str
            Path to the file used to display some information about the project in the report.
        x_train : pd.DataFrame, optional
            DataFrame used for training the model.
        y_train: pd.Series or pd.DataFrame, optional
            Series of labels in the training set.
        y_test : pd.Series or pd.DataFrame, optional
            Series of labels in the test set.
        title_story : str, optional
            Report title.
        title_description : str, optional
            Report title description (as written just below the title).
        metrics : dict, optional
            Metrics used in the model performance section. The metrics parameter should be a list
            of dict. Each dict contains they following keys :
            'path' (path to the metric function, ex: 'sklearn.metrics.mean_absolute_error'),
            'name' (optional, name of the metric as displayed in the report),
            and 'use_proba_values' (optional, possible values are False (default) or True
            if the metric uses proba values instead of predicted values).
            For example, metrics=[{'name': 'F1 score', 'path': 'sklearn.metrics.f1_score'}]
        working_dir : str, optional
            Working directory in which will be generated the notebook used to create the report
            and where the objects used to execute it will be saved. This parameter can be usefull
            if one wants to create its own custom report and debug the notebook used to generate
            the html report. If None, a temporary directory will be used.
        notebook_path : str, optional
            Path to the notebook used to generate the report. If None, the Shapash base report
            notebook will be used.
        kernel_name : str, optional
            Name of the kernel used to generate the report. This parameter can be usefull if
            you have multiple jupyter kernels and that the method does not use the right kernel
            by default.

        Examples
        --------
        >>> xpl.generate_report(
                output_file='report.html',
                project_info_file='utils/project_info.yml',
                x_train=x_train,
                y_train=y_train,
                y_test=ytest,
                title_story="House prices project report",
                title_description="This document is a data science report of the kaggle house prices project."
                metrics=[
                    {
                        'path': 'sklearn.metrics.mean_squared_error',
                        'name': 'Mean squared error',  # Optional : name that will be displayed next to the metric
                    },
                    {
                        'path': 'sklearn.metrics.mean_absolute_error',
                        'name': 'Mean absolute error',
                    }
                ]
            )
        """
        rm_working_dir = False
        if not working_dir:
            working_dir = tempfile.mkdtemp()
            rm_working_dir = True

        if not hasattr(self, 'model'):
            raise AssertionError(
                "Explainer object was not compiled. Please compile the explainer "
                "object using .compile(...) method before generating the report."
            )

        execute_report(working_dir=working_dir,
                       explainer=self,
                       project_info_file=project_info_file,
                       x_train=x_train,
                       y_train=y_train,
                       y_test=y_test,
                       config=dict(title_story=title_story,
                                   title_description=title_description,
                                   metrics=metrics),
                       notebook_path=notebook_path,
                       kernel_name=kernel_name)
        export_and_save_report(working_dir=working_dir,
                               output_file=output_file)

        if rm_working_dir:
            shutil.rmtree(working_dir)