from sklearn.decomposition import PCA pca = PCA(n_components=2) # Ici on n'en garder que 2 pca.fit(X_scaled) #Ou bien pca = PCA(n_components=2).fit(X_scaled) X_pca = pca.transform(X_scaled) print("Original shape {}".format(str(X_scaled.shape))) print("PCA shape {}".format(str(X_pca.shape))) #In [70]: run UNS_PCA_cancer.py #Original shape (569, 30) #PCA shape (569, 2) meaning that we only kept 2 features #plt first vs second components if verbose == True: plt.figure(figsize=(8, 8)) dsets.discrete_scatter(X_pca[:, 0], X_pca[:, 1], cancer.target) plt.legend(cancer.target_names, loc="best") plt.gca().set_aspect("equal") plt.xlabel("First Principal component") plt.ylabel("Second Principal component") print("PCA components:\n {}".format(pca.components_)) ## Heat map plt.matshow(pca.components_, cmap='viridis') plt.yticks([0, 1], ['First component', 'Second component']) plt.colorbar() plt.xticks(xrange(len(cancer.feature_names)), cancer.feature_names, rotation=60, ha='left')
#Output #Predicted probabilities: #[[ 0.01573626 0.98426374] # [ 0.84335828 0.15664172] # [ 0.98112869 0.01887131] # [ 0.97407199 0.02592801] # [ 0.01352142 0.98647858] # [ 0.02504637 0.97495363]] # Nous permet de savoir quelles sont les prédictions qui sont plus ou moins sûres # Toutefois il faut faire attention au fait que dans un modèle qui OverF, les predictions pourront plus souvent être des faux positifs ou faux négatifs #Il faut alors calibrer notre modèle. # On regardera alors sur un graphe si la prédiction correspond aux proba fig, axes = plt.subplots(1,2,figsize=(13,5)) dsets.plot_2d_separator(gbrt, X, ax=axes[0], fill=True, alpha=.4, cm=dsets.cm2) score_images = dsets.plot_2d_scores(gbrt, X, ax=axes[1], cm=dsets.ReBl, function='predict_proba') for ax in axes: dsets.discrete_scatter(X_test[:,0], X_test[:,1], y_test, markers='^', ax=ax) dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train, markers='o', ax=ax) ax.set_xlabel("Feature 0") ax.set_ylabel("Feature 1") cbar = plt.colorbar(score_images, ax=axes.tolist()) axes[0].legend(["Test Class 0", "Test Class 1", "Train Class 0", "Test Class 1"],ncol=4, loc=(.1,1.1))
from sklearn.ensemble import GradientBoostingClassifier as GBC import datasets_mglearn as dsets dsets = reload(dsets) iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) gbrt = GBC(max_depth=3, learning_rate=0.01, random_state=0).fit(X_train, y_train) print("Train Accuracy prediction: {}".format(gbrt.score(X_train, y_train))) print("Test Accuracy prediction: {}".format(gbrt.score(X_test, y_test))) print("Prediction Probabilities to detect flase positives or true negatives: \n{}".format(gbrt.predict_proba(X_test)[:6,:])) print("Sum of these probabilities should be equal to one, each time: \n{}".format(gbrt.predict_proba(X_test)[:6].sum(axis=1))) print("We compare y_pred that we know with y_test predicted by the ML: \n{}".format(y_test == gbrt.predict(X_test))) print("{}".format({k:v for k,v in zip (["False","True"], np.bincount(y_test == gbrt.predict(X_test)))})) dsets.discrete_scatter(X[:,0], X[:,1], y, markers=['o','^', 'v']) plt.legend(["{}".format(iris.feature_names[0]), "{}".format(iris.feature_names[1]),"{}".format(iris.feature_names[2])], loc=(0.1,1.1), ncol=4) plt.ion() dsets.plot_feature_importances(gbrt, iris)
import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.cm as cm import datasets_mglearn as dsets dsets = reload(dsets) plt.ion() #generate dataset X, y = dsets.make_forge() plt.figure("Blolbs-Classification example") dsets.discrete_scatter(X[:, 0], X[:, 1], y) plt.legend(["Class 0", "Class 1"], loc=4) plt.xlabel("First Feature") plt.ylabel("Second feature") print "X.shape: {}".format(X.shape) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=3) clf.fit(X_train, y_train) print("Test set prediction: {}".format(clf.predict(X_test)))
plt.ion() from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons import datasets_mglearn as dsets dsets = reload(dsets) X,y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10,10]).fit(X_train, y_train) plt.figure("[10,10]") dsets.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") print("hidden_layer_sizes : [10] : 1 hidden avec 10 nœuds \n \t \t [10,10] : 2 hidden 10 nœuds chacune") mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10,10,10]).fit(X_train, y_train) plt.figure("{}".format(mlp.__getattribute__("hidden_layer_sizes" ))) dsets.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) dsets.discrete_scatter(X_train[:,0], X_train[:,1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") mlp = MLPClassifier(solver='lbfgs', activation='tanh',random_state=0, hidden_layer_sizes=[10,10,10]).fit(X_train, y_train) plt.figure("{}, {}".format(mlp.__getattribute__("hidden_layer_sizes"), mlp.__getattribute__("activation")))