from load_breast_cancer_data import load_data from matplotlib import pyplot as plt import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold from sklearn.pipeline import Pipeline from sklearn.metrics import roc_curve, auc # auc: area under curve from scipy import interp # roc_curve: Receiver Operator characteristic curve from sklearn.metrics import roc_auc_score, accuracy_score # load and split the dataset X_train, X_test, y_train, y_test = load_data() # Building the confusion matrix pipe_lr = Pipeline([ ('scl', StandardScaler()), ('clf', LogisticRegression( penalty='l2', random_state=0))]) # Drawing the roc curve X_train2 = X_train[:, [4, 14]] cv = list(StratifiedKFold(n_splits=3, random_state=1).split(X_train2, y_train)) fig = plt.figure(figsize=(7, 5)) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas = pipe_lr.fit(X_train2[train], y_train[train]).predict_proba(X_train2[test])
from load_breast_cancer_data import load_data import matplotlib.pyplot as plt import numpy as np import os from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.model_selection import learning_curve # load and split the dataset X_train, X_test, y_train, y_test = load_data() pipe_lr = Pipeline([('scl', StandardScaler()), ('clf', LogisticRegression(penalty='l2', random_state=0))]) train_sizes, train_scores, test_scores = \ learning_curve(estimator=pipe_lr, X=X_train, y=y_train, train_sizes=np.linspace(0.1, 1.0, 10), # ---> use 10 evenly spaced relative # intervals for the training set sizes. cv=10, n_jobs=1) train_mean = np.mean(train_scores, axis=1) train_std = np.std(train_scores, axis=1) test_mean = np.mean(test_scores, axis=1) test_std = np.std(test_scores, axis=1) ########### plotting for train_data ############# plt.plot(train_sizes,