def fscore(y_test, y_score): """ :param y_test: output vector - predictions on the test set :param y_score: output vector which contains probabilities for each contained estimator :return: plot object """ # binarize output vector y_test = binarize(y_test) print('y_test binarized shape = ', np.shape(y_test)) n_classes = np.shape(y_test)[1] # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() return plt
# output vector y = df['KONCENTRACIJA'].as_matrix() del df['KONCENTRACIJA'] # output the column names print(df.columns.values.tolist()) # binarize id columns id_cols = ["vrstaBiljke", "ID_VRSTE", "ID_LOKACIJE"] id_columns = df[id_cols].as_matrix() for col in id_cols: del df[col] id_columns_binarized = binarize(id_columns[:, 0]) for i in range(1, np.size(id_cols)): id_columns_binarized = np.column_stack((id_columns_binarized, binarize(id_columns[:, i]))) # transform to suitable representation X = df.as_matrix() # testing the shapes before and after the binarization print('before binarization X = ', X.shape) print('id_columns_binazried = ', id_columns_binarized.shape) X = np.column_stack((X, id_columns_binarized)) print('after binarization X = ', X.shape) # clear from n/a's clear_rows = ~np.isnan(X).any(axis=1)
# output vector y = df['KONCENTRACIJA'].as_matrix() del df['KONCENTRACIJA'] # output the column names print(df.columns.values.tolist()) # binarize id columns id_cols = ["vrstaBiljke", "ID_VRSTE", "ID_LOKACIJE"] id_columns = df[id_cols].as_matrix() for col in id_cols: del df[col] id_columns_binarized = binarize(id_columns[:, 0]) for i in range(1, np.size(id_cols)): id_columns_binarized = np.column_stack( (id_columns_binarized, binarize(id_columns[:, i]))) # transform to suitable representation X = df.as_matrix() # testing the shapes before and after the binarization print('before binarization X = ', X.shape) print('id_columns_binazried = ', id_columns_binarized.shape) X = np.column_stack((X, id_columns_binarized)) print('after binarization X = ', X.shape) # clear from n/a's