def load_data(): # y = f(X) X, y = mglearn.datasets.make_forge() print("X.shape: {}".format(X.shape)) # create a scatter matrix mglearn.discrete_scatter(X[:, 0], X[:,1], y) plt.xlabel("First feature") plt.ylabel("Second feature") plt.legend(["Class 0", "Class 1"], loc='lower right') plt.show()
from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import mglearn X, y = mglearn.datasets.make_forge() fig, axes = plt.subplots(1, 2, figsize=(10, 3)) for model, ax in zip([LinearSVC(), LogisticRegression()], axes): clf = model.fit(X, y) mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5, ax=ax, alpha=.7) mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{}".format(clf.__class__.__name__)) ax.set_xlabel("Feature 0") ax.set_ylabel("Feature 1") axes[0].legend() plt.show()
%cd C:\Users\bama6012\Desktop\desk\Python My study\Py Codes-Introduction to Machine Learning Book data='C:/Users/bama6012/Desktop/desk/Python My study/data/' #import required libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt import mglearn # Some Sample Datasets----------------------------------------------------------------------------------------- # generate dataset X,y=mglearn.datasets.make_forge() #plot dataset mglearn.discrete_scatter(X[:,0],X[:,1],y) plt.legend(["Class 0", "Class 1"], loc=4) plt.xlabel("First feature") plt.ylabel("Second feature") print("X.shape: {}".format(X.shape)) X, y = mglearn.datasets.make_wave(n_samples=40) plt.plot(X, y, 'o') plt.ylim(-3, 3) plt.xlabel("Feature") plt.ylabel("Target") from sklearn.datasets import load_breast_cancer cancer=load_breast_cancer() print('cancer.keys() : \n{}'.format(cancer.keys())) print('shape of cancer data : {}'.format(cancer['data'].shape))
from sklearn.svm import SVC import matplotlib.pyplot as plt import mglearn X, y = mglearn.tools.make_handcrafted_dataset() svm = SVC(kernel='rbf', C=10, gamma=0.1).fit(X, y) mglearn.plots.plot_2d_separator(svm, X, eps=.5) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) sv = svm.support_vectors_ sv_labels = svm.dual_coef_.ravel() > 0 mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3) plt.xlabel('Feature 0') plt.ylabel('Feature 1') plt.show()
# keep the first two principal components of the data pca=PCA(n_components=2) # fit PCA model to breast cancer data pca.fit(X_scaled) # transform data onto the first two principal components X_pca=pca.transform(X_scaled) print("Original shape: {}".format(str(X_scaled.shape))) print("Reduced shape: {}".format(str(X_pca.shape))) # plot first vs. second principal component, colored by class plt.figure(figsize=(8, 8)) mglearn.discrete_scatter(X_pca[:, 0], X_pca[:, 1], cancer.target) plt.legend(cancer.target_names, loc="best") plt.gca().set_aspect("equal") plt.xlabel("First principal component") plt.ylabel("Second principal component") """The principal components themselves are stored in the components_ attribute of the PCA object during fitting""" print("PCA component shape: {}".format(pca.components_.shape)) """ Each row in components_ corresponds to one principal component, and they are sorted by their importance (the first principal component comes first, etc.). The columns correspond to the original features attribute of the PCA in this example, “mean radius,” “mean texture,” and so on. Let’s have a look at the content of components_"""
# K-Means for temp anomalies # ----------------------------------------------------------------------------- from sklearn.cluster import KMeans #Initialize the algorithm and fit it with the data kmeans = KMeans(n_clusters=5) X = Var_frommean.to_numpy().reshape(-1, 1) kmeans.fit(X) kmeans.cluster_centers_ print("Cluster memberships:\n{}".format(kmeans.labels_)) #Assign classes to each data point based on the model classes = kmeans.predict(X) #Inspect the centroids of the clusters print(kmeans.cluster_centers_) kmeans_clusters = kmeans.cluster_centers_ #Shortcut to see/visualize the datapoints and range of each cluster mglearn.discrete_scatter(X, X, kmeans.labels_, markers='o') #Volcanic activity is expected to have the maximum impact out of all forcings so look for the time points which are in the cluster associated with the lowest centroid dip = np.argwhere(classes == np.argmin(kmeans_clusters)) #look for the years which have the biggest dips dipinyear = list(int(timelist[i][0] / 10000) for i in dip) len(dipinyear) # ----------------------------------------------------------------------------- # K-Means for filtered temp anomalies # ----------------------------------------------------------------------------- #Initialize the algorithm and fit it with the data kmeans = KMeans(n_clusters=5) X = Var_frommean.to_numpy().reshape(-1, 1) # apply the filter # define window to filter lowcut = 1 / (365 * 86400 * 10)
# Evaluate print("Accuracy: {:.2f}".format(classifier.score(X_test, y_test))) # Decision boundary fig, axes = plt.subplots(1, 3, figsize=(10, 3)) for n_neighbors, ax in zip([1, 3, 9], axes): classifier = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y) mglearn.plots.plot_2d_separator(classifier, X, fill=True, eps=0.5, ax=ax, alpha=.4) mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{} neighbor".format(n_neighbors)) ax.set_xlabel("Feature 0") ax.set_ylabel("Feature 1") axes[0].legend(loc=3) plt.show() # Complexity/Generalization of Model with Cancer dataset from sklearn.datasets import load_breast_cancer cancer = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=66)
""" Created on Thu Mar 15 10:18:39 2018 @author: Yuan-Ray Chang """ import numpy as np import matplotlib.pyplot as plt import pandas as pd import mglearn from sklearn.datasets import make_blobs X, y = make_blobs(centers=4, random_state=8) y = y % 2 mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel("Feature 0") plt.ylabel("Feature 1") from sklearn.svm import LinearSVC linear_svm = LinearSVC().fit(X, y) mglearn.plots.plot_2d_separator(linear_svm, X) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel("Feature 0") plt.ylabel("Feature 1") X_new = np.hstack([X, X[:, 1:]**2]) from mpl_toolkits.mplot3d import Axes3D, axes3d figure = plt.figure() ax = Axes3D(figure, elev=-152, azim=-26)
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt # 导入mglearn模块 import sys sys.path.append("../") import mglearn X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) fig, axes = plt.subplots(2, 4, figsize=(20, 8)) for i, ax in enumerate(axes.ravel()): mlp = MLPClassifier(solver='lbfgs', random_state=i, hidden_layer_sizes=[100, 100]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax) plt.show()
import matplotlib.pyplot as plt import mglearn fig, axes = plt.subplots(2, 3, figsize=(20, 10)) for i, (ax, tree) in enumerate(zip(axes.ravel(), forest.estimators_)): ax.set_title("tree {}".format(i)) mglearn.plots.plot_tree_partition(x, y, tree, ax=ax) mglearn.plots.plot_2d_separator(forest, x, fill=True, ax=axes[-1, -1], alpha=.4) axes[-1, -1].set_title("랜덤 포레스트") mglearn.discrete_scatter(x[:, 0], x[:, 1], y) # In[42]: import matplotlib.pyplot as plt import mglearn forest.fit(X, y) importances = forest.feature_importances_ std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0) indices = np.argsort(importances) plt.figure() plt.title("Feature importances") plt.barh(range(X.shape[1]), importances[indices], color="r",
# array([[ 0.37649644, -0.06637905, 0.85134571, 0.35924188], # [ 0.6240207 , 0.75538031, -0.18479376, -0.07648543]]) C1 = 0.37649644 * X1 + -0.06637905 * X2 + 0.85134571 * X3 + 0.35924188 * X4 C2 = 0.6240207 * X1 + 0.75538031 * X2 + -0.18479376 * X3 + -0.07648543 * X4 # PCA + knn (앙상블) for iris data # 4) 유도된 인공변수 knn 모델에 적용 m_knn = knn_c(5) m_knn.fit(train_x_pca, train_y) m_knn.score(test_x_pca, test_y) # 0.973 # 5) data point들의 분포 확인 (산점도) import mglearn mglearn.discrete_scatter(train_x_pca[:, 0], train_x_pca[:, 1], train_y) # -------------------------------- 연 습 문 제 -------------------------------- # # 연습문제 7. cancer data의 PCA + SVM 적용 # 1) data loading df1 = pd.read_csv('cancer.csv') Y = df1.iloc[:, 1] X = df1.iloc[:, 2:] train_x, test_x, train_y, test_y = train_test_split(X, Y, random_state = 0) # 2) scaling from sklearn.preprocessing import MinMaxScaler as minmax m_sc = minmax() m_sc.fit(train_x) train_x_sc = m_sc.transform(train_x)
mglearn.tools.plot_2d_separator(gbrt, X, ax=axes[0], alpha=0.4, fill=True, cm=mglearn.cm2) scores_image = mglearn.tools.plot_2d_scores(gbrt, X, ax=axes[1], alpha=0.5, cm=mglearn.ReBl, function='predict_proba') for ax in axes: mglearn.discrete_scatter(X_test[:, 0], X_test[:, 1], y_test, markers='^', ax=ax) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, markers='o', ax=ax) ax.set_xlabel('Feature 0') ax.set_ylabel('Feature 1') cbar = plt.colorbar(scores_image, ax=axes.tolist()) # 颜色棒,颜色越深代表置信度越高 axes[0].legend( ['Test class 0', 'Test class 1', 'Train class 0', 'Train class 1'], ncol=4, loc=(0.1, 1.1)) plt.show()