コード例 #1
0
from main import mglearn, train_test_split, plt, np

from sklearn.svm import SVC

X, y = mglearn.tools.make_handcrafted_dataset()
svm = SVC(kernel='rbf', C=10, gamma=0.1).fit(X, y)
mglearn.plots.plot_2d_separator(svm, X, eps=.5)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)

fig, axes = plt.subplots(3, 3, figsize=(15, 10))

for ax, C in zip(axes, [-1, 0, 3]):
    for a, gamma in zip(ax, range(-1, 2)):
        mglearn.plots.plot_svm(log_C=C, log_gamma=gamma, ax=a)
axes[0, 0].legend(["Class 0", "Class 1", "sv class 0", "sv class 1"],
                  ncol=4,
                  loc=(.9, 1.2))

sv = svm.support_vectors_
sv_labels = svm.dual_coef_.ravel() > 0
mglearn.discrete_scatter(sv[:, 0],
                         sv[:, 1],
                         sv_labels,
                         s=15,
                         markeredgewidth=3)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.show()
コード例 #2
0
from main import mglearn, train_test_split, pd, plt, np

import os
ram_prices = pd.read_csv(
    os.path.join(mglearn.datasets.DATA_PATH, "ram_price.csv"))
plt.semilogy(ram_prices.date, ram_prices.price)
plt.xlabel("year")
plt.ylabel("Price in $/Mbtype")

from sklearn.tree import DecisionTreeRegressor
data_train = ram_prices[ram_prices.date < 2000]
data_test = ram_prices[ram_prices.date >= 2000]

X_train = data_train.date[:, np.newaxis]
y_train = np.log(data_train.price)

tree = DecisionTreeRegressor().fit(X_train, y_train)

from sklearn.linear_model import LinearRegression
linear_reg = LinearRegression().fit(X_train, y_train)

X_all = ram_prices.date[:, np.newaxis]

pred_tree = tree.predict(X_all)
pred_lr = linear_reg.predict(X_all)

price_tree = np.exp(pred_tree)
price_lr = np.exp(pred_lr)

plt.semilogy(data_train.date, data_train.price, label="Training data")
plt.semilogy(data_test.date, data_test.price, label="Test data")
コード例 #3
0
from main import mglearn, train_test_split, plt, np, pd

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

X, y = mglearn.datasets.make_wave(n_samples=100)
line = np.linspace(-3, 3, 1000, endpoint=False).reshape(-1, 1)
reg = DecisionTreeRegressor(min_samples_split=3).fit(X, y)
plt.plot(line, reg.predict(line), label="linear regression")

plt.plot(X[:, 0], y, 'o', c='k')
plt.ylabel("R")
plt.xlabel("I")
plt.legend(loc='best')

bins = np.linspace(-3, 3, 11)
print("{}".format(bins))

which_bin = np.digitize(X, bins=bins)
print(X[:5])
print(which_bin[:5])

from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse=False)
encoder.fit(which_bin)
X_binned = encoder.transform(which_bin)
print(X_binned[:5])

print(X[:, 0])

# plt.show()
コード例 #4
0
# データセット
# fig, axes = plt.subplots(2, 5, figsize=(10, 5), subplot_kw={"xticks":(), 'yticks':()})
# for ax, img in zip(axes.ravel(), digits['images']):
#     ax.imshow(img)

colors = ['#476A2A','#7851B8','#BD3430','#4A2D4E','#875525','#A83683','#4E655E','#853541','#3A3120','#535D8E']
plt.figure(figsize=(10, 10))

# tsne
tsne = TSNE(random_state=42)
digits_tsne = tsne.fit_transform(digits['data'])
plt.xlim(digits_tsne[:, 0].min(), digits_tsne[:, 0].max())
plt.ylim(digits_tsne[:, 1].min(), digits_tsne[:, 1].max())
for i in range(len(digits['data'])):
    plt.text(digits_tsne[i, 0], digits_tsne[i, 1], str(digits['target'][i]), color = colors[digits['target'][i]], fontdict={"weight": 'bold', 'size': 9})

# pca
# pca = PCA(n_components=2)
# pca.fit(digits['data'])
# digits_pca = pca.transform(digits['data'])
# plt.xlim(digits_pca[:, 0].min(), digits_pca[:, 0].max())
# plt.ylim(digits_pca[:, 1].min(), digits_pca[:, 1].max())
# for i in range(len(digits['data'])):
#     plt.text(digits_pca[i, 0], digits_pca[i, 1], str(digits['target'][i]), color = colors[digits['target'][i]], fontdict={"weight": 'bold', 'size': 9})


plt.xlabel("First")
plt.ylabel("Second")
plt.show()

コード例 #5
0
#     ax.set_title("{} neighboor(s)".format(n_neighbors))
#     ax.set_xlabel("feature 0")
#     ax.set_ylabel("feature 0")
# axes[0].legend(loc=3)
# plt.show()

# いくつ点を与えれば正答率が高くなるのか(k-最近傍法)
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer['data'],
                                                    cancer['target'],
                                                    stratify=cancer['target'],
                                                    random_state=66)
training_accuracy = []
test_accuracy = []
neighbors_settings = range(1, 11)

for n_neighbors in neighbors_settings:
    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    clf.fit(X_train, y_train)

    training_accuracy.append(clf.score(X_train, y_train))
    test_accuracy.append(clf.score(X_test, y_test))

plt.plot(neighbors_settings, training_accuracy, label="training accuracy")
plt.plot(neighbors_settings, test_accuracy, label="test accuracy")
plt.ylabel("Accuracy")
plt.xlabel("n_neighbors")
plt.legend()
plt.show()