コード例 #1
0
ファイル: fscore.py プロジェクト: simke9445/pollen-classifier
def fscore(y_test, y_score):
    """

    :param y_test: output vector - predictions on the test set
    :param y_score: output vector which contains probabilities for each contained estimator
    :return: plot object
    """

    # binarize output vector
    y_test = binarize(y_test)
    print('y_test binarized shape = ', np.shape(y_test))
    n_classes = np.shape(y_test)[1]

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
                                                                    y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='micro-average Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='Precision-recall curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    return plt
コード例 #2
0
ファイル: main.py プロジェクト: simke9445/pollen-classifier
# output vector
y = df['KONCENTRACIJA'].as_matrix()
del df['KONCENTRACIJA']

# output the column names
print(df.columns.values.tolist())

# binarize id columns
id_cols = ["vrstaBiljke", "ID_VRSTE", "ID_LOKACIJE"]
id_columns = df[id_cols].as_matrix()

for col in id_cols:
    del df[col]

id_columns_binarized = binarize(id_columns[:, 0])

for i in range(1, np.size(id_cols)):
    id_columns_binarized = np.column_stack((id_columns_binarized, binarize(id_columns[:, i])))

# transform to suitable representation
X = df.as_matrix()

# testing the shapes before and after the binarization
print('before binarization X = ', X.shape)
print('id_columns_binazried = ', id_columns_binarized.shape)
X = np.column_stack((X, id_columns_binarized))
print('after binarization X = ', X.shape)

# clear from n/a's
clear_rows = ~np.isnan(X).any(axis=1)
コード例 #3
0
ファイル: main.py プロジェクト: simke9445/pollen-classifier
# output vector
y = df['KONCENTRACIJA'].as_matrix()
del df['KONCENTRACIJA']

# output the column names
print(df.columns.values.tolist())

# binarize id columns
id_cols = ["vrstaBiljke", "ID_VRSTE", "ID_LOKACIJE"]
id_columns = df[id_cols].as_matrix()

for col in id_cols:
    del df[col]

id_columns_binarized = binarize(id_columns[:, 0])

for i in range(1, np.size(id_cols)):
    id_columns_binarized = np.column_stack(
        (id_columns_binarized, binarize(id_columns[:, i])))

# transform to suitable representation
X = df.as_matrix()

# testing the shapes before and after the binarization
print('before binarization X = ', X.shape)
print('id_columns_binazried = ', id_columns_binarized.shape)
X = np.column_stack((X, id_columns_binarized))
print('after binarization X = ', X.shape)

# clear from n/a's