コード例 #1
0
from load_breast_cancer_data import load_data
from matplotlib import pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_curve, auc			# auc: area under curve
from scipy import interp							# roc_curve: Receiver Operator characteristic curve 
from sklearn.metrics import roc_auc_score, accuracy_score

# load and split the dataset
X_train, X_test, y_train, y_test = load_data()

# Building the confusion matrix
pipe_lr = Pipeline([
			('scl', StandardScaler()),
			('clf', LogisticRegression(
							penalty='l2', random_state=0))])

# Drawing the roc curve
X_train2 = X_train[:, [4, 14]]
cv = list(StratifiedKFold(n_splits=3,
					random_state=1).split(X_train2, y_train))
fig = plt.figure(figsize=(7, 5))
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []
for i, (train, test) in enumerate(cv):
	probas = pipe_lr.fit(X_train2[train],
						y_train[train]).predict_proba(X_train2[test])
コード例 #2
0
from load_breast_cancer_data import load_data
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import learning_curve

# load and split the dataset
X_train, X_test, y_train, y_test = load_data()

pipe_lr = Pipeline([('scl', StandardScaler()),
                    ('clf', LogisticRegression(penalty='l2', random_state=0))])

train_sizes, train_scores, test_scores = \
   learning_curve(estimator=pipe_lr,
       X=X_train,
       y=y_train,
       train_sizes=np.linspace(0.1, 1.0, 10),
       # ---> use 10 evenly spaced relative
       # 	   intervals for the training set sizes.
       cv=10,
       n_jobs=1)
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

########### plotting for train_data #############
plt.plot(train_sizes,