コード例 #1
0
def binary_classification_panel(self,
                                model,
                                labels=None,
                                title_scale=1.0,
                                color_map="viridis",
                                random_state=1,
                                chart_scale=15,
                                save_objects=False):
    """
    Documentation:

        ---
        Description:
            Generate a panel of reports and visualizations summarizing the
            performance of a classification model.

        ---
        Parameters:
            model : model object
                Instantiated model object.
            labels : list, default=None
                Custom labels for confusion matrix axes. If left as none,
                will default to 0, 1, 2...
            color_map : str specifying built-in matplotlib colormap, default="viridis"
                Color map applied to plots.
            title_scale : float, default=1.0
                Controls the scaling up (higher value) and scaling down (lower value) of the size
                of the main chart title, the x_axis title and the y_axis title.
            random_state : int, default=1
                Random number seed.
            chart_scale : int or float, default=15
                Controls size and proportions of chart and chart elements. Higher value creates
                larger plots and increases visual elements proportionally.
            save_objects : boolean, default=False
                Controls whether visualizations and summary table are saved to the experiment directory.
    """
    if not save_objects:
        print("*" * 55)
        print(f"* Estimator: {model.estimator_name}")
        print(f"* Parameter set: {model.model_iter}")
        print("*" * 55)

        print("\n" + "*" * 55)
        print("Training data evaluation\n")

    ## training data
    # fit model on training data and generate predictions using training data
    y_pred = model.fit(self.training_features,
                       self.training_target).predict(self.training_features)

    # generate classification_report using training data
    report = classification_report(
        self.training_target,
        y_pred,
        target_names=labels
        if labels is not None else np.unique(self.training_target.values),
        output_dict=True,
    )

    df = pd.DataFrame(report).transpose()

    # save or display classification report
    if save_objects:
        csv_path = os.path.join(
            self.evaluation_classification_report_object_dir,
            f"{model.estimator_name}_train_classification_report.csv")
        df.to_csv(csv_path, index=False)

    else:
        display(df)

    # create prettierplot object
    p = PrettierPlot(chart_scale=chart_scale, plot_orientation="wide_narrow")

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title=
        f"Confusion matrix - training data\nModel: {model.estimator_name}\nParameter set: {model.model_iter}",
        y_shift=0.4,
        x_shift=0.25,
        position=121,
        title_scale=title_scale,
    )

    # add confusion plot to canvas
    plot_confusion_matrix(
        estimator=model,
        X=self.training_features,
        y_true=self.training_target,
        display_labels=labels
        if labels is not None else np.unique(self.training_target.values),
        cmap=color_map,
        values_format=".0f",
        ax=ax,
    )

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title=
        f"ROC curve - training data\nModel: {model.estimator_name}\nParameter set: {model.model_iter}",
        x_label="False positive rate",
        y_label="True positive rate",
        y_shift=0.35,
        position=122,
        title_scale=title_scale,
    )
    # add ROC curve to canvas
    p.roc_curve_plot(
        model=model,
        X_train=self.training_features,
        y_train=self.training_target,
        linecolor=style.style_grey,
        ax=ax,
    )
    plt.subplots_adjust(wspace=0.3)

    # save plots or show
    if save_objects:
        plot_path = os.path.join(
            self.evaluation_plots_object_dir,
            f"{model.estimator_name}_train_visualization.jpg")
        plt.tight_layout()
        plt.savefig(plot_path)
        plt.close()
    else:
        plt.show()

    ## validation data
    if not save_objects:
        print("\n" + "*" * 55)
        print("Validation data evaluation\n")

    # fit model on training data and generate predictions using validation data
    y_pred = model.fit(self.training_features,
                       self.training_target).predict(self.validation_features)

    # generate classification_report using training data
    report = classification_report(
        self.validation_target,
        y_pred,
        target_names=labels
        if labels is not None else np.unique(self.training_target.values),
        output_dict=True,
    )

    df = pd.DataFrame(report).transpose()

    # save or display classification report
    if save_objects:
        csv_path = os.path.join(
            self.evaluation_classification_report_object_dir,
            f"{model.estimator_name}_validation_classification_report.csv")
        df.to_csv(csv_path, index=False)

    else:
        display(df)

    # create prettierplot object
    p = PrettierPlot(chart_scale=chart_scale, plot_orientation="wide_narrow")

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title=
        f"Confusion matrix - validation data\nModel: {model.estimator_name}\nParameter set: {model.model_iter}",
        y_shift=0.4,
        x_shift=0.25,
        position=121,
        title_scale=title_scale,
    )

    # add confusion matrix to canvas
    plot_confusion_matrix(
        estimator=model,
        X=self.validation_features,
        y_true=self.validation_target,
        display_labels=labels
        if labels is not None else np.unique(self.training_target.values),
        cmap=color_map,
        values_format=".0f",
        ax=ax,
    )

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title=
        f"ROC curve - validation data\nModel: {model.estimator_name}\nParameter set: {model.model_iter}",
        x_label="False positive rate",
        y_label="True positive rate",
        y_shift=0.35,
        position=122,
        # position=111 if X_valid is not None else 121,
        title_scale=title_scale,
    )
    # add ROC curve to canvas
    p.roc_curve_plot(
        model=model,
        X_train=self.training_features,
        y_train=self.training_target,
        X_valid=self.validation_features,
        y_valid=self.validation_target,
        linecolor=style.style_grey,
        ax=ax,
    )
    plt.subplots_adjust(wspace=0.3)

    # save plots or show
    if save_objects:
        plot_path = os.path.join(
            self.evaluation_plots_object_dir,
            f"{model.estimator_name}_validation_visualization.jpg")
        plt.tight_layout()
        plt.savefig(plot_path)
        plt.close()
    else:
        plt.show()
コード例 #2
0
def binary_classification_panel(self, model, X_train, y_train, X_valid=None, y_valid=None, labels=None,
                        n_folds=None, title_scale=1.0, color_map="viridis", random_state=1, chart_scale=15):
    """
    Documentation:

        ---
        Description:
            Generate a panel of reports and visualizations summarizing the
            performance of a classification model.

        ---
        Parameters:
            model : model object
                Instantiated model object.
            X_train : Pandas DataFrame
                Training data observations.
            y_train : Pandas Series
                Training target data.
            X_valid : Pandas DataFrame, default=None
                Validation data observations.
            y_valid : Pandas Series, default=None
                Validation target data.
            labels : list, default=None
                Custom labels for confusion matrix axes. If left as none,
                will default to 0, 1, 2...
            n_folds : int, default=None
                Number of cross-validation folds to use. If validation data is provided through
                X_valid/y_valid, n_folds is ignored.
            color_map : str specifying built-in matplotlib colormap, default="viridis"
                Color map applied to plots.
            title_scale : float, default=1.0
                Controls the scaling up (higher value) and scaling down (lower value) of the size
                of the main chart title, the x_axis title and the y_axis title.
            random_state : int, default=1
                Random number seed.
            chart_scale : int or float, default=15
                Controls size and proportions of chart and chart elements. Higher value creates
                larger plots and increases visual elements proportionally.
    """
    print("*" * 55)
    print("* Estimator: {}".format(model.estimator_name))
    print("* Parameter set: {}".format(model.model_iter))
    print("*" * 55)

    print("\n" + "*" * 55)
    print("Training data evaluation\n")

    ## training panel
    # fit model on training data and generate predictions using training data
    y_pred = model.fit(X_train, y_train).predict(X_train)

    # print and generate classification_report using training data
    print(
            classification_report(
                y_train,
                y_pred,
                target_names=labels if labels is not None else np.unique(y_train.values),
            )
        )

    # create prettierplot object
    p = PrettierPlot(chart_scale=chart_scale, plot_orientation="wide_narrow")

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title="Confusion matrix - training data\nModel: {}\nParameter set: {}".format(
            model.estimator_name, model.model_iter
        ),
        y_shift=0.4,
        x_shift=0.25,
        position=121,
        title_scale=title_scale,
    )

    # add confusion plot to canvas
    plot_confusion_matrix(
        estimator=model,
        X=X_train,
        y_true=y_train,
        display_labels=labels if labels is not None else np.unique(y_train.values),
        cmap=color_map,
        values_format=".0f",
        ax=ax,
    )

    # add canvas to prettierplot object
    ax = p.make_canvas(
        title="ROC curve - training data\nModel: {}\nParameter set: {}".format(
            model.estimator_name,
            model.model_iter,
        ),
        x_label="False positive rate",
        y_label="True positive rate",
        y_shift=0.35,
        position=122,
        title_scale=title_scale,
    )
    # add ROC curve to canvas
    p.roc_curve_plot(
        model=model,
        X_train=X_train,
        y_train=y_train,
        linecolor=style.style_grey,
        ax=ax,
    )
    plt.subplots_adjust(wspace=0.3)
    plt.show()

    # if validation data is provided
    if X_valid is not None:
        print("\n" + "*" * 55)
        print("Validation data evaluation\n")

        # fit model on training data and generate predictions using validation data
        y_pred = model.fit(X_train, y_train).predict(X_valid)

        # print and generate classification_report using training data
        print(
            classification_report(
                y_valid,
                y_pred,
                target_names=labels if labels is not None else np.unique(y_train.values),
            )
        )

        # create prettierplot object
        p = PrettierPlot(chart_scale=chart_scale, plot_orientation="wide_narrow")

        # add canvas to prettierplot object
        ax = p.make_canvas(
            title="Confusion matrix - validation data\nModel: {}\nParameter set: {}".format(
                model.estimator_name, model.model_iter
            ),
            y_shift=0.4,
            x_shift=0.25,
            position=121,
            title_scale=title_scale,
        )

        # add confusion matrix to canvas
        plot_confusion_matrix(
            estimator=model,
            X=X_valid,
            y_true=y_valid,
            display_labels=labels if labels is not None else np.unique(y_train.values),
            cmap=color_map,
            values_format=".0f",
            ax=ax,
        )

        # add canvas to prettierplot object
        ax = p.make_canvas(
            title="ROC curve - validation data\nModel: {}\nParameter set: {}".format(
                model.estimator_name,
                model.model_iter,
            ),
            x_label="False positive rate",
            y_label="True positive rate",
            y_shift=0.35,
            position=122,
            # position=111 if X_valid is not None else 121,
            title_scale=title_scale,
        )
        # add ROC curve to canvas
        p.roc_curve_plot(
            model=model,
            X_train=X_train,
            y_train=y_train,
            X_valid=X_valid,
            y_valid=y_valid,
            linecolor=style.style_grey,
            ax=ax,
        )
        plt.subplots_adjust(wspace=0.3)
        plt.show()

    # if n_folds are provided, indicating cross-validation
    elif isinstance(n_folds, int):
        print("\n" + "*" * 55)
        print("Cross validation evaluation\n")

        # generate cross-validation indices
        cv = list(
            StratifiedKFold(
                n_splits=n_folds, shuffle=True, random_state=random_state
            ).split(X_train, y_train)
        )

        # generate colors
        color_list = style.color_gen(color_map, num=len(cv))

        # iterate through cross-validation indices
        for i, (train_ix, valid_ix) in enumerate(cv):
            print("\n" + "*" * 55)
            print("CV Fold {}\n".format(i + 1))

            X_train_cv = X_train.iloc[train_ix]
            y_train_cv = y_train.iloc[train_ix]
            X_valid_cv = X_train.iloc[valid_ix]
            y_valid_cv = y_train.iloc[valid_ix]

            # fit model on training data and generate predictions using holdout observations
            y_pred = model.fit(X_train_cv, y_train_cv).predict(X_valid_cv)

            # print and generate classification_report using holdout observations
            print(
            classification_report(
                    y_valid_cv,
                    y_pred,
                    target_names=labels if labels is not None else np.unique(y_train.values),
                )
            )

            # create prettierplot object
            p = PrettierPlot(chart_scale=chart_scale, plot_orientation="wide_narrow")

            # add canvas to prettierplot object
            ax = p.make_canvas(
                title="Confusion matrix - CV Fold {}\nModel: {}\nParameter set: {}".format(
                    i + 1, model.estimator_name, model.model_iter
                ),
                y_shift=0.4,
                x_shift=0.25,
                position=121,
                title_scale=title_scale,
            )

            # add confusion matrix to canvas
            plot_confusion_matrix(
                estimator=model,
                X=X_valid_cv,
                y_true=y_valid_cv,
                display_labels=labels if labels is not None else np.unique(y_train.values),
                cmap=color_map,
                values_format=".0f",
                ax=ax,
            )

            # add canvas to prettierplot object
            ax = p.make_canvas(
                title="ROC curve - CV Fold {}\nModel: {}\nParameter set: {}".format(
                    i + 1,
                    model.estimator_name,
                    model.model_iter,
                ),
                x_label="False positive rate",
                y_label="True positive rate",
                y_shift=0.35,
                position=122,
                title_scale=title_scale,
            )

            # add ROC curve to canvas
            p.roc_curve_plot(
                model=model,
                X_train=X_train_cv,
                y_train=y_train_cv,
                X_valid=X_valid_cv,
                y_valid=y_valid_cv,
                linecolor=style.style_grey,
                ax=ax,
            )
            plt.subplots_adjust(wspace=0.3)
            plt.show()