コード例 #1
0
def make_plots(y_test, y_pred, y_prob, algorithm, timestamp):
    def _save_and_close(type):
        plt.savefig('static/img/{}/{}-{}.png'.format(type, algorithm,
                                                     timestamp),
                    dpi=200)
        plt.close('all')

    size = (20, 20)
    name = classifier_names[algorithm]

    plot_confusion_matrix(y_test,
                          y_pred,
                          normalize=True,
                          figsize=size,
                          title_fontsize=40,
                          text_fontsize=30,
                          title=name)
    _save_and_close('cm')

    if y_prob is not None:
        plot_precision_recall_curve(y_test,
                                    y_prob,
                                    figsize=size,
                                    title_fontsize=40,
                                    text_fontsize=25,
                                    title=name)
        _save_and_close('precrec')

        plot_roc_curve(y_test,
                       y_prob,
                       figsize=size,
                       title_fontsize=40,
                       text_fontsize=25,
                       title=name)
        _save_and_close('roc')
コード例 #2
0
def main():
    args = parse_args()

    real_labels, predicted_labels = read_file(args.testlog)

    skplt.plot_confusion_matrix(real_labels,
                                predicted_labels,
                                normalize=True,
                                title=' ',
                                text_fontsize="large")
    plt.savefig("{}/{}/confusion_matrix.pdf".format(args.outputdir,
                                                    args.configname),
                bbox_inches='tight')

    cm = confusion_matrix(real_labels, predicted_labels)
    np.set_printoptions(precision=2)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    diagonal = np.squeeze(np.asarray(np.matrix(cm).diagonal()))

    with open("{}/{}".format(args.outputdir, "confusion_matrix_all"),
              "a+") as f:
        f.write(args.configname + "|")
        for x in diagonal:
            f.write(str(x) + " ")
        f.write("\n")
コード例 #3
0
    def plotConfusionMatrix(self, models_results):
        for key, values in models_results.items():
            fig, axes = plt.subplots(3, 3, figsize=(15, 15))

            indexes = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0),
                       (2, 1), (2, 2)]

            for i, clfr in enumerate(values):
                skplt.plot_confusion_matrix(
                    y_true=clfr.y_test,
                    y_pred=clfr.predictions,
                    normalize=True,
                    ax=axes[indexes[i]],
                    title="Matrix de Confusão Normalizada")

                plt.sca(axes[indexes[i]])
                axes[indexes[i]].set_xlabel("Treinamento/Teste ({}/{})".format(
                    round(clfr.weight_train * 100, 0),
                    round(clfr.weight_test * 100, 0)))  # set x label
                axes[indexes[i]].get_xaxis().set_ticks(
                    [])  # hidden x axis text
                axes[indexes[i]].get_yaxis().set_ticks([])

            plt.tight_layout()
            # fig.subplots_adjust(hspace=0.3)
            fig.subplots_adjust(top=0.95)
            #fig.suptitle(key, fontsize=16)
            plt.savefig("plots/confusion_matrix_{}.pdf".format(key))
コード例 #4
0
def evaluate_features(X, y, clf=None):
    """General helper function for evaluating effectiveness of passed features in ML model
    
    Prints out Log loss, accuracy, and confusion matrix with 3-fold stratified cross-validation
    
    Args:
        X (array-like): Features array. Shape (n_samples, n_features)
        
        y (array-like): Labels array. Shape (n_samples,)
        
        clf: Classifier to use. If None, default Log reg is use.
    """
    if clf is None:
        clf = LogisticRegression()

    probas = cross_val_predict(clf,
                               X,
                               y,
                               cv=StratifiedKFold(random_state=8),
                               n_jobs=-1,
                               method='predict_proba',
                               verbose=2)
    pred_indices = np.argmax(probas, axis=1)
    classes = np.unique(y)
    preds = classes[pred_indices]
    print('Log loss: {}'.format(log_loss(y, probas)))
    print('Accuracy: {}'.format(accuracy_score(y, preds)))
    skplt.plot_confusion_matrix(y, preds)
コード例 #5
0
def generate_confusion_matrix(real_labels, predicted_labels):
    skplt.plot_confusion_matrix(
        real_labels, predicted_labels,
        normalize=True,
        title='Normalized Confusion Matrix',
        text_fontsize="large"
    )
    plt.savefig('confusion_matrix.png', bbox_inches='tight')
コード例 #6
0
def print_score(m, df, y):
	print('Accuracy: [Train , Val]')
	res = m.score(df, y)
	print(res)
	print('Train Confusion Matrix')
	df_train_proba = m.predict_proba(df)
	df_train_pred_indices = np.argmax(df_train_proba, axis=1)
	classes_train = np.unique(y)
	preds_train = classes_train[df_train_pred_indices]
	skplt.plot_confusion_matrix(y, preds_train)
コード例 #7
0
ファイル: confusion_matrix.py プロジェクト: Manerone/ML2017
def build_matrix(file_path, title, save_path, real_labels):
    with open(file_path) as f:
        predicted_labels = f.readlines()

    predicted_labels = [float(x.strip()) for x in predicted_labels]

    plt.figure()
    skplt.plot_confusion_matrix(real_labels,
                                predicted_labels,
                                text_fontsize="large",
                                normalize=True,
                                title=title)
    plt.savefig(save_path, bbox_inches='tight')
コード例 #8
0
 def classify_all(self, filename):
     self.test_file = pd.read_csv(filename, sep=',', index_col=None)
     test = np.array(self.test_file.values[:, :3])
     test_data_class = self.test_file.Class
     self.output = self.NB.predict(test)
     probability = self.NB.predict_proba(test)
     cm = metrics.confusion_matrix(test_data_class, self.output)
     accuracy = accuracy_score(test_data_class, self.output)
     print("Accuracy for Naive Bayes")
     print(accuracy * 100)
     print("Confusion Matrix for Naive Bayes")
     #print(cm)
     skplt.plot_confusion_matrix(test_data_class, self.output)
     plt.show()
     return self.output, accuracy * 100
コード例 #9
0
def evaluate_features(X, y, clf=None):
    if clf is None:
        clf = LogisticRegression()

    probas = cross_val_predict(clf,
                               X,
                               y,
                               cv=StratifiedKFold(random_state=8),
                               n_jobs=-1,
                               method='predict_proba',
                               verbose=2)
    pred_indices = np.argmax(probas, axis=1)
    classes = np.unique(y)
    preds = classes[pred_indices]
    print('Log loss: {}'.format(log_loss(y, probas)))
    print('Accuracy: {}'.format(accuracy_score(y, preds)))
    skplt.plot_confusion_matrix(y, preds)
コード例 #10
0
    def plotConfusionMatrix(self, models_results):
        for key, values in sorted(models_results.items()):
            fig, ax = plt.subplots()

            skplt.plot_confusion_matrix(y_true=values.y_test,
                                        y_pred=values.predictions,
                                        normalize=True,
                                        ax=ax,
                                        title="Matrix de Confusão Normalizada")

            plt.sca(ax)
            ax.set_xlabel("")  # set x label
            ax.get_xaxis().set_ticks([])  # hidden x axis text
            ax.get_yaxis().set_ticks([])

            plt.tight_layout()
            #fig.subplots_adjust(top=0.95)
            plt.savefig(self.description +
                        "/confusion_matrix_{}.pdf".format(key))
コード例 #11
0
 def test_array_like(self):
     ax = skplt.plot_confusion_matrix([0, 1], [1, 0])
コード例 #12
0
ファイル: classifiers.py プロジェクト: batermj/scikit-plot
def plot_confusion_matrix_with_cv(clf, X, y, labels=None, true_labels=None,
                                  pred_labels=None, title=None,
                                  normalize=False, hide_zeros=False,
                                  x_tick_rotation=0, do_cv=True, cv=None,
                                  shuffle=True, random_state=None, ax=None,
                                  figsize=None, cmap='Blues',
                                  title_fontsize="large",
                                  text_fontsize="medium"):
    """Generates the confusion matrix for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements ``fit`` and ``predict``
            methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        labels (array-like, shape (n_classes), optional): List of labels to
            index the matrix. This may be used to reorder or select a subset of
            labels. If none is given, those that appear at least once in ``y``
            are used in sorted order.
            (new in v0.2.5)

        true_labels (array-like, optional): The true labels to display.
            If none is given, then all of the labels are used.

        pred_labels (array-like, optional): The predicted labels to display.
            If none is given, then all of the labels are used.

        title (string, optional): Title of the generated plot. Defaults to
            "Confusion Matrix" if normalize` is True. Else, defaults to
            "Normalized Confusion Matrix.

        normalize (bool, optional): If True, normalizes the confusion matrix
            before plotting. Defaults to False.

        hide_zeros (bool, optional): If True, does not plot cells containing a
            value of zero. Defaults to False.

        x_tick_rotation (int, optional): Rotates x-axis tick labels by the
            specified angle. This is useful in cases where there are numerous
            categories and the labels overlap each other.

        do_cv (bool, optional): If True, the classifier is cross-validated on
            the dataset using the cross-validation strategy in `cv` to generate
            the confusion matrix. If False, the confusion matrix is generated
            without training or cross-validating the classifier. This assumes
            that the classifier has already been called with its `fit` method
            beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines
            the cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is
            used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines
            whether to shuffle the training data before splitting using
            cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator
            state used for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.

        cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
            Colormap used for plotting the projection. View Matplotlib Colormap
            documentation for available options.
            https://matplotlib.org/users/colormaps.html

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".


    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.

    Example:
        >>> rf = classifier_factory(RandomForestClassifier())
        >>> rf.plot_confusion_matrix(X, y, normalize=True)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()

        .. image:: _static/examples/plot_confusion_matrix.png
           :align: center
           :alt: Confusion matrix
    """
    y = np.array(y)

    if not do_cv:
        y_pred = clf.predict(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle,
                                 random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        y_pred = np.concatenate(preds_list)
        y_true = np.concatenate(trues_list)

    ax = plotters.plot_confusion_matrix(y_true=y_true, y_pred=y_pred,
                                        labels=labels, true_labels=true_labels,
                                        pred_labels=pred_labels,
                                        title=title, normalize=normalize,
                                        hide_zeros=hide_zeros,
                                        x_tick_rotation=x_tick_rotation, ax=ax,
                                        figsize=figsize, cmap=cmap,
                                        title_fontsize=title_fontsize,
                                        text_fontsize=text_fontsize)

    return ax
コード例 #13
0
cv = StratifiedKFold(n_splits=3, shuffle=True)

from sklearn.model_selection import cross_val_score
score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='neg_mean_squared_error', cv=cv).mean()
score = cross_val_score(LogisticRegression(), X_all, y_all, scoring='accuracy', cv=cv).mean()

#### Learning Curve

from scikitplot import plotters as skplt
skplt.plot_learning_curve(LogisticRegression(), X_all, y_all)
plt.show()
skplt.plot_roc_curve(y_true=y_val, y_probas=y_proba)
plt.show()
skplt.plot_precision_recall_curve(y_true=y_val, y_probas=y_proba)
plt.show()
skplt.plot_confusion_matrix(y_true=y_val, y_pred=y_pred, normalize=True)
plt.show()

#### XGBoost

from xgboost import XGBRegressor
import xgboost as xgb
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
}
dtrain = xgb.DMatrix(X_all, label=y_all)
history = xgb.cv(params, dtrain, num_boost_round=1024, early_stopping_rounds=5, verbose_eval=20)

booster = xgb.train(params, dtrain)
xgb.plot_importance(booster=booster)
コード例 #14
0
def plot_confusion_matrix(clf, X, y, title=None, normalize=False, do_cv=True, cv=None,
                          shuffle=True, random_state=None, ax=None):
    """Generates the confusion matrix for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements ``fit`` and ``predict`` methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if
            `normalize` is True. Else, defaults to "Normalized Confusion Matrix.

        normalize (bool, optional): If True, normalizes the confusion matrix before plotting.
            Defaults to False.

        do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the
            cross-validation strategy in `cv` to generate the confusion matrix. If False, the
            confusion matrix is generated without training or cross-validating the classifier.
            This assumes that the classifier has already been called with its `fit` method beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines the
            cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the
            training data before splitting using cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator state used
            for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
        >>> rf = classifier_factory(RandomForestClassifier())
        >>> rf.plot_learning_curve(X, y, normalize=True)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()

        .. image:: _static/examples/plot_confusion_matrix.png
           :align: center
           :alt: Confusion matrix
    """
    y = np.array(y)

    if not do_cv:
        y_pred = clf.predict(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        y_pred = np.concatenate(preds_list)
        y_true = np.concatenate(trues_list)

    ax = plotters.plot_confusion_matrix(y_true=y_true, y_pred=y_pred,
                                        title=title, normalize=normalize, ax=ax)

    return ax
コード例 #15
0
def plot_cmat(y_test, y_pred):
    skplt.plot_confusion_matrix(y_test,y_pred)
    plt.show()
コード例 #16
0
def plot_cmat(yte, ypred):
    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()
コード例 #17
0
ファイル: driver.py プロジェクト: gr93/Fake_News_Detection
def plot_cmat(yte, ypred, title):
    '''Plotting confusion matrix'''
    skplt.plot_confusion_matrix(yte, ypred, normalize=True)
    plt.title(title)
    plt.show()
コード例 #18
0
def plot_confusion_matrix(clf,
                          X,
                          y,
                          labels=None,
                          title=None,
                          normalize=False,
                          hide_zeros=False,
                          x_tick_rotation=0,
                          do_cv=True,
                          cv=None,
                          shuffle=True,
                          random_state=None,
                          ax=None,
                          figsize=None,
                          title_fontsize="large",
                          text_fontsize="medium"):
    """Generates the confusion matrix for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements ``fit`` and ``predict`` methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        labels (array-like, shape (n_classes), optional): List of labels to
            index the matrix. This may be used to reorder or select a subset of labels.
            If none is given, those that appear at least once in ``y`` are used in sorted order.
            (new in v0.2.5)

        title (string, optional): Title of the generated plot. Defaults to "Confusion Matrix" if
            `normalize` is True. Else, defaults to "Normalized Confusion Matrix.

        normalize (bool, optional): If True, normalizes the confusion matrix before plotting.
            Defaults to False.

        hide_zeros (bool, optional): If True, does not plot cells containing a value of zero.
            Defaults to False.

        x_tick_rotation (int, optional): Rotates x-axis tick labels by the specified angle. This is
            useful in cases where there are numerous categories and the labels overlap each other.

        do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the
            cross-validation strategy in `cv` to generate the confusion matrix. If False, the
            confusion matrix is generated without training or cross-validating the classifier.
            This assumes that the classifier has already been called with its `fit` method beforehand.

        cv (int, cross-validation generator, iterable, optional): Determines the
            cross-validation strategy to be used for splitting.

            Possible inputs for cv are:
              - None, to use the default 3-fold cross-validation,
              - integer, to specify the number of folds.
              - An object to be used as a cross-validation generator.
              - An iterable yielding train/test splits.

            For integer/None inputs, if ``y`` is binary or multiclass,
            :class:`StratifiedKFold` used. If the estimator is not a classifier
            or if ``y`` is neither binary nor multiclass, :class:`KFold` is used.

        shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the
            training data before splitting using cross-validation. Default set to True.

        random_state (int :class:`RandomState`): Pseudo-random number generator state used
            for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). 
            Defaults to ``None``.

        title_fontsize (string or int, optional): Matplotlib-style fontsizes. 
            Use e.g. "small", "medium", "large" or integer-values. Defaults to "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes. 
            Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium".


    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
        >>> rf = classifier_factory(RandomForestClassifier())
        >>> rf.plot_confusion_matrix(X, y, normalize=True)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()

        .. image:: _static/examples/plot_confusion_matrix.png
           :align: center
           :alt: Confusion matrix
    """
    y = np.array(y)

    if not do_cv:
        y_pred = clf.predict(X)
        y_true = y

    else:
        if cv is None:
            cv = StratifiedKFold(shuffle=shuffle, random_state=random_state)
        elif isinstance(cv, int):
            cv = StratifiedKFold(n_splits=cv,
                                 shuffle=shuffle,
                                 random_state=random_state)
        else:
            pass

        clf_clone = clone(clf)

        preds_list = []
        trues_list = []
        for train_index, test_index in cv.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf_clone.fit(X_train, y_train)
            preds = clf_clone.predict(X_test)
            preds_list.append(preds)
            trues_list.append(y_test)
        y_pred = np.concatenate(preds_list)
        y_true = np.concatenate(trues_list)

    ax = plotters.plot_confusion_matrix(y_true=y_true,
                                        y_pred=y_pred,
                                        labels=labels,
                                        title=title,
                                        normalize=normalize,
                                        hide_zeros=hide_zeros,
                                        x_tick_rotation=x_tick_rotation,
                                        ax=ax,
                                        figsize=figsize,
                                        title_fontsize=title_fontsize,
                                        text_fontsize=text_fontsize)

    return ax
コード例 #19
0
def plot_cmat(yte, ypred):
    '''Plotting confusion matrix'''
    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()
コード例 #20
0
 def test_array_like(self):
     ax = skplt.plot_confusion_matrix([0, 1], [1, 0])
コード例 #21
0
 def plot_confusion_matrix(self, normalize=True):
     # add thresholding
     skplt.plot_confusion_matrix(self.y_test,
                                 self.y_pred,
                                 normalize=normalize)
     plt.show()
コード例 #22
0
ファイル: LSTM.py プロジェクト: sparkingdark/fake-news-bot-
def plot_cmat(yte, ypred):
    '''Plotting confusion matrix'''
    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()
    plt.savefig('rnn.png')
コード例 #23
0
from sklearn.datasets import load_svmlight_file
import matplotlib.pyplot as plt
import scikitplot.plotters as skplt
import numpy as np
import csv

if __name__ == '__main__':
    X_test, y_test = load_svmlight_file('test.dat')

    with open('test.dat.predict') as f:
        predictions = f.readlines()

    predictions = [float(x.strip()) for x in predictions]

    skplt.plot_confusion_matrix(y_true=y_test,
                                y_pred=predictions,
                                normalize=True,
                                title="Matrix de Confusão Normalizada")

    plt.savefig('confusion-matrix_1.pdf', bbox_inches='tight')

    with open('prob/test.dat.prob_temp.predict') as textFile:
        predictions2 = [line.split() for line in textFile]

    plt.figure()
    plt.title("Distribuição de Probabilidade - SVM")

    positive = []
    negative = []

    for target, pr in zip(y_test, predictions2):
        predict = pr[0]
コード例 #24
0
def main(X_data, y_data, test_size):
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X_data, y_data, test_size=test_size)

    # cria uma DT
    clf = tree.DecisionTreeClassifier()

    clf.fit(X_train, y_train)

    # predicao do classificador
    y_pred = clf.predict(X_test)

    return y_test, y_pred


if __name__ == "__main__":
    sizes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    X_data, y_data = load_svmlight_file('./data')
    if not os.path.exists('./arvore/'):
        os.makedirs('./arvore/')
    for x in sizes:
        y_test, y_pred = main(X_data, y_data, x)
        skplt.plot_confusion_matrix(
            y_test,
            y_pred,
            normalize=True,
            title='Normalized Confusion Matrix (test size: ' + str(x) + ')',
            text_fontsize="large")
        plt.savefig('./arvore/' + str(int(x * 10)) + '.png',
                    bbox_inches='tight')
コード例 #25
0
"""An example showing the plot_confusion_matrix method used by a scikit-learn classifier"""
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits as load_data
import matplotlib.pyplot as plt
from scikitplot import classifier_factory

X, y = load_data(return_X_y=True)
rf = classifier_factory(RandomForestClassifier())
rf.plot_confusion_matrix(X, y, normalize=True)
plt.show()

# Using the more flexible functions API
from scikitplot import plotters as skplt
rf = RandomForestClassifier()
rf = rf.fit(X, y)
preds = rf.predict(X)
skplt.plot_confusion_matrix(y_true=y, y_pred=preds)
plt.show()
コード例 #26
0
ファイル: lstm2.py プロジェクト: iamtpb/PersonalisedMedicine
          Y_train,
          epochs=10,
          batch_size=batch_size,
          validation_split=0.2,
          callbacks=[ckpt_callback])

model = load_model('lstm_model_x')

probas = model.predict(X_test)
pred_indices = np.argmax(probas, axis=1)
classes = np.array(range(1, 10))
preds = classes[pred_indices]

print('Log loss: {}'.format(
    log_loss(classes[np.argmax(Y_test, axis=1)], probas)))
print('Accuracy: {}'.format(
    accuracy_score(classes[np.argmax(Y_test, axis=1)], preds)))
skplt.plot_confusion_matrix(classes[np.argmax(Y_test, axis=1)], preds)

Xtest = tokenizer.texts_to_sequences(testx['Text'].values)
Xtest = pad_sequences(Xtest, maxlen=2000)

probas = model.predict(Xtest)

submission_df = pd.DataFrame(probas,
                             columns=['class' + str(c + 1) for c in range(9)])
submission_df['ID'] = df_test['ID']
submission_df.head()

#submission_df.to_csv('submissionX.csv', index=False)
コード例 #27
0
def plot_cmat(yte, ypred):
    # confusion matrix
    warnings.filterwarnings("ignore")
    '''Plotting confusion matrix'''
    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()