from main import mglearn, train_test_split, plt, np from sklearn.svm import SVC X, y = mglearn.tools.make_handcrafted_dataset() svm = SVC(kernel='rbf', C=10, gamma=0.1).fit(X, y) mglearn.plots.plot_2d_separator(svm, X, eps=.5) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) fig, axes = plt.subplots(3, 3, figsize=(15, 10)) for ax, C in zip(axes, [-1, 0, 3]): for a, gamma in zip(ax, range(-1, 2)): mglearn.plots.plot_svm(log_C=C, log_gamma=gamma, ax=a) axes[0, 0].legend(["Class 0", "Class 1", "sv class 0", "sv class 1"], ncol=4, loc=(.9, 1.2)) sv = svm.support_vectors_ sv_labels = svm.dual_coef_.ravel() > 0 mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()
from main import mglearn, train_test_split, pd, plt, np import os ram_prices = pd.read_csv( os.path.join(mglearn.datasets.DATA_PATH, "ram_price.csv")) plt.semilogy(ram_prices.date, ram_prices.price) plt.xlabel("year") plt.ylabel("Price in $/Mbtype") from sklearn.tree import DecisionTreeRegressor data_train = ram_prices[ram_prices.date < 2000] data_test = ram_prices[ram_prices.date >= 2000] X_train = data_train.date[:, np.newaxis] y_train = np.log(data_train.price) tree = DecisionTreeRegressor().fit(X_train, y_train) from sklearn.linear_model import LinearRegression linear_reg = LinearRegression().fit(X_train, y_train) X_all = ram_prices.date[:, np.newaxis] pred_tree = tree.predict(X_all) pred_lr = linear_reg.predict(X_all) price_tree = np.exp(pred_tree) price_lr = np.exp(pred_lr) plt.semilogy(data_train.date, data_train.price, label="Training data") plt.semilogy(data_test.date, data_test.price, label="Test data")
from main import mglearn, train_test_split, plt, np, pd from sklearn.linear_model import LinearRegression from sklearn.tree import DecisionTreeRegressor X, y = mglearn.datasets.make_wave(n_samples=100) line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1) reg = DecisionTreeRegressor(min_samples_split=3).fit(X, y) plt.plot(line, reg.predict(line), label="linear regression") plt.plot(X[:, 0], y, 'o', c='k') plt.ylabel("R") plt.xlabel("I") plt.legend(loc='best') bins = np.linspace(-3, 3, 11) print("{}".format(bins)) which_bin = np.digitize(X, bins=bins) print(X[:5]) print(which_bin[:5]) from sklearn.preprocessing import OneHotEncoder encoder = OneHotEncoder(sparse=False) encoder.fit(which_bin) X_binned = encoder.transform(which_bin) print(X_binned[:5]) print(X[:, 0]) # plt.show()
# データセット # fig, axes = plt.subplots(2, 5, figsize=(10, 5), subplot_kw={"xticks":(), 'yticks':()}) # for ax, img in zip(axes.ravel(), digits['images']): # ax.imshow(img) colors = ['#476A2A','#7851B8','#BD3430','#4A2D4E','#875525','#A83683','#4E655E','#853541','#3A3120','#535D8E'] plt.figure(figsize=(10, 10)) # tsne tsne = TSNE(random_state=42) digits_tsne = tsne.fit_transform(digits['data']) plt.xlim(digits_tsne[:, 0].min(), digits_tsne[:, 0].max()) plt.ylim(digits_tsne[:, 1].min(), digits_tsne[:, 1].max()) for i in range(len(digits['data'])): plt.text(digits_tsne[i, 0], digits_tsne[i, 1], str(digits['target'][i]), color = colors[digits['target'][i]], fontdict={"weight": 'bold', 'size': 9}) # pca # pca = PCA(n_components=2) # pca.fit(digits['data']) # digits_pca = pca.transform(digits['data']) # plt.xlim(digits_pca[:, 0].min(), digits_pca[:, 0].max()) # plt.ylim(digits_pca[:, 1].min(), digits_pca[:, 1].max()) # for i in range(len(digits['data'])): # plt.text(digits_pca[i, 0], digits_pca[i, 1], str(digits['target'][i]), color = colors[digits['target'][i]], fontdict={"weight": 'bold', 'size': 9}) plt.xlabel("First") plt.ylabel("Second") plt.show()
# ax.set_title("{} neighboor(s)".format(n_neighbors)) # ax.set_xlabel("feature 0") # ax.set_ylabel("feature 0") # axes[0].legend(loc=3) # plt.show() # いくつ点を与えれば正答率が高くなるのか(k-最近傍法) from sklearn.datasets import load_breast_cancer cancer = load_breast_cancer() X_train, X_test, y_train, y_test = train_test_split(cancer['data'], cancer['target'], stratify=cancer['target'], random_state=66) training_accuracy = [] test_accuracy = [] neighbors_settings = range(1, 11) for n_neighbors in neighbors_settings: clf = KNeighborsClassifier(n_neighbors=n_neighbors) clf.fit(X_train, y_train) training_accuracy.append(clf.score(X_train, y_train)) test_accuracy.append(clf.score(X_test, y_test)) plt.plot(neighbors_settings, training_accuracy, label="training accuracy") plt.plot(neighbors_settings, test_accuracy, label="test accuracy") plt.ylabel("Accuracy") plt.xlabel("n_neighbors") plt.legend() plt.show()