コード例 #1
0
def knnWaveEG():
    X, y = mglearn.datasets.make_wave(n_samples=40)
    XTrain, XTest, yTrain, yTest = train_test_split(\
        X, y, random_state = 0)
    reg = KNeighborsRegressor(n_neighbors=3)
    reg.fit(XTrain, yTrain)
    print("Test set predictions:\n{}".format(reg.predict(XTest)))
    print("Test set R^2: {:.2f}".format(reg.score(XTest, yTest)))
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    # reshape(-1, 1), -1 -> unknown dimension.
    line = np.linspace(-3, 3, 1000).reshape(-1, 1)
    for nNeighbors, ax in zip([1, 3, 8], axes):
        reg = KNeighborsRegressor(n_neighbors=nNeighbors)
        reg.fit(XTrain, yTrain)
        ax.plot(line, reg.predict(line))
        ax.plot(XTrain, yTrain, '^', c = mglearn.cm2(0), \
                markersize = 8)
        ax.plot(XTest, yTest, 'v', c = mglearn.cm2(1), \
                markersize = 8)
        ax.set_title(("{} neighbor(s)\n train score: {:.2f}" \
                      "test score: {:.2f}").format(nNeighbors, \
                                    reg.score(XTrain, yTrain), \
                                    reg.score(XTest, yTest)))
        ax.set_xlabel("Feature")
        ax.set_ylabel("Target")
        axes[0].legend(["Model predictions", \
                        "Training data/target", \
                        "Test data/target"], \
                        loc = "best")
    plt.show()
コード例 #2
0
ファイル: kmeans_clustering.py プロジェクト: honchardev/Fun
def kmeans_failure_3():  # moon
    # k-means also performs poorly if the clusters have more complex shapes, like the two_moons data
    X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
    kmeans = KMeans(n_clusters=2)
    kmeans.fit(X)
    y_pred = kmeans.predict(X)
    # plot the cluster assignments and cluster centers
    plt.scatter(X[:, 0], X[:, 1], c=y_pred, s=60)
    plt.scatter(
        kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
        c=[mglearn.cm2(0), mglearn.cm2(1)],
        marker='^', s=100, linewidth=2
    )
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.show()
コード例 #3
0
def analyze(X_train, X_test, y_train, y_test):		
	# analyze
	fig, axes = plt.subplots(1, 3, figsize=(15, 4))
	# create 1,000 data points, evenly spaced between -3 and 3
	line = np.linspace(-3, 3, 1000).reshape(-1, 1)
	for n_neighbors, ax in zip([1, 3, 9], axes):
		# make predictions using 1, 3 or 9 neighbors
		reg = KNeighborsRegressor(n_neighbors=n_neighbors)
		reg.fit(X_train, y_train)
		ax.plot(line, reg.predict(line))
		ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8)
		ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)
		train_score = reg.score(X_train, y_train)
		test_score = reg.score(X_test, y_test)
		ax.set_title("{} neighbor(s)\n train score: {:.2f} test score: {:.2f}".format(n_neighbors, train_score, test_score))
		ax.set_xlabel("Feature")
		ax.set_ylabel("Target")

	axes[0].legend(["Model Predictions", "Training data/target", "Test data/target"], loc="best")
	plt.show()
コード例 #4
0
def compare_n_neighbors():
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    # create 1000 data points, evenly spaced between -3 and 3
    line = np.linspace(-3, 3, 1000).reshape(-1, 1)
    for n_neighbors, ax in zip([1, 3, 9], axes):
        # make predictions using 1, 3, or 9 neighbors
        reg = KNeighborsRegressor(n_neighbors=n_neighbors)
        reg.fit(X_train, y_train)
        ax.plot(line, reg.predict(line))
        ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8)
        ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)

        ax.set_title(
            '{} neighbor(s)\ntrain score: {:.2f} test score: {:.2f}'.format(
                n_neighbors, reg.score(X_train, y_train),
                reg.score(X_test, y_test)))
        ax.set_xlabel('Feature')
        ax.set_ylabel('Target')
    axes[0].legend(
        ['Model predictions', 'Training data/target', 'Test data/target'],
        loc='best')
    plt.show()
コード例 #5
0
    markers='^', markeredgewidth=2)
plt.xlabel("특성 0")
plt.ylabel("특성 1")
# two_moons 데이터를 생성합니다(이번에는 노이즈를 조금만 넣습니다)
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)

# 두 개의 클러스터로 데이터에 KMeans 알고리즘을 적용합니다
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)

# 클러스터 할당과 클러스터 중심을 표시합니다
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=mglearn.cm2, s=60, edgecolors='k')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
            marker='^', c=[mglearn.cm2(0), mglearn.cm2(1)], s=100, linewidth=2, edgecolors='k')
plt.xlabel("특성 0")
plt.ylabel("특성 1")
X_train, X_test, y_train, y_test = train_test_split(
    X_people, y_people, stratify=y_people, random_state=42)
nmf = NMF(n_components=100, random_state=0)
nmf.fit(X_train)
pca = PCA(n_components=100, random_state=0)
pca.fit(X_train)
kmeans = KMeans(n_clusters=100, random_state=0)
kmeans.fit(X_train)

X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
X_reconstructed_kmeans = kmeans.cluster_centers_[kmeans.predict(X_test)]
X_reconstructed_nmf = np.dot(nmf.transform(X_test), nmf.components_)
コード例 #6
0
                                                    random_state=1)
scaler = MinMaxScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
# print(X_train.min(), X_train.max())
# print(X_train_scaled.min(), X_train_scaled.max())

X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)

#作图对比
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0],
                X_train[:, 1],
                c=mglearn.cm2(0),
                label='Training set',
                s=60)
axes[0].scatter(X_test[:, 0],
                X_test[:, 1],
                c=mglearn.cm2(1),
                marker='^',
                label='Testing set',
                s=60)
axes[0].legend(loc='upper left')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
コード例 #7
0
print("max значение признака после масштабирования:\n {}".format( X_train_scaled.max(axis=0)))


"In[7]:"
X_test_scaled = (X_test - min_on_training) / range_on_training
print("min значение признака после масштабирования:\n{}".format(X_test_scaled.min(axis=0)))
print("max значение признака после масштабирования:\n{}".format(X_test_scaled.max(axis=0)))


"In[8]:"
from sklearn.datasets import make_blobs
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0], X_train[:, 1],
	c=mglearn.cm2(0), label="Обучающий набор", s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^',
	c=mglearn.cm2(1), label="Тестовый набор", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("Исходные данные")

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

axes[1].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1],
	c=mglearn.cm2(0), label="Обучающий набор", s=60)

axes[1].scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], marker='^', c=mglearn.cm2(1), label="Тестовый набор", s=60)
axes[1].set_title("Масштабированные данные")
コード例 #8
0
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
make_blobs?

X
_
X.shape

# test_size, float between 0 and 1, the proportion of the dataset generating test set; int, the number of dataset generating test set
X_train, X_test = train_test_split(X, random_state=5, test_size=.1) 
X_test.shape

# plot the training and test data
fig, axes = plt.subplots(1, 3, figsize=(13, 4))

# subplot 1
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=mglearn.cm2(0), label='training set', s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^', c=mglearn.cm2(1), label='test set', s=60)
axes[0].legend(loc='upper left')
axes[0].set_title('original data')

# subplot 2
# scale the data(based on training) using MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
axes[1].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1], c=mglearn.cm2(0), label='training set', s=60)
axes[1].scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], marker='^', c=mglearn.cm2(1), label='test set', s=60)
axes[1].set_title('scaled date')

# subplot 3
コード例 #9
0
stu_x_sc44['num'] = d1.iloc[:,0]
stu_x_sc44.drop('num',axis=1)


# 선 말고 점으로 시각화 

df3
df4 = np.array(df3)
df4[:,0]

# 시각화 
# 1) figure와 subplot 생성(1X3)
fig, ax = plt.subplots(1,2)

import mglearn 
ax[0].scatter(df4[:,2], df4[:,0], c=mglearn.cm2(0))  # 결석
ax[1].scatter(df4[:,2], df4[:,1], c=mglearn.cm2(1))  # 성적

# 시각화 
# 1) figure와 subplot 생성(1X3)
fig, ax = plt.subplots(1,1)

import mglearn 
ax[0].scatter(df4[:,2], df4[:,0], c=mglearn.cm2(0))  # 결석
ax[0].scatter(df4[:,2], df4[:,1], c=mglearn.cm2(1))  # 성적

# 시각화 
# 1) figure와 subplot 생성(1X3)
fig, ax = plt.subplots(1,2)

import mglearn 
コード例 #10
0
# plt.subplots(行数, 列数, figsize=(5,10))
fig, axis = plt.subplots(5, 1, figsize=(8, 16))

# ついでにスコアの推移も見る
training_accuracy = []
test_accuracy = []
# np.reshape(newshape)
# Gives a new shape to an array without changing its data.
# reshape(n,m) => n行 m列にする.-1を指定すると要素数を元によしなにやってくれる
line = np.linspace(-3, 3, 1000).reshape(-1, 1)  # 1000行1列
for n, ax in zip([1, 3, 9, 15, 30], axis):  # axは描画先
    reg = KNeighborsRegressor(n_neighbors=n).fit(X_train, y_train)
    training_accuracy.append(reg.score(X_train, y_train))
    test_accuracy.append(reg.score(X_test, y_test))
    ax.plot(line, reg.predict(line))  # -3 から 3 までのデータを評価
    ax.plot(X_train, y_train, '^', c=mg.cm2(0), markersize=2)
    ax.plot(X_test,  y_test,  'v', c=mg.cm2(1), markersize=2)

    ax.set_title(
        "n={}, train_score={:.2f}, test_score={:.2f}".format(n, reg.score(X_train, y_train), reg.score(X_test, y_test)))
    ax.set_xlabel("Feature")
    ax.set_ylabel("Target")

axis[0].legend(["Model predictions", "Training data/target",
                "Test data/target"], loc="best")

# nを増やすと予測(Model predictions)はスムーズになり,
# 訓練データに対する精度が下がる.
# 一方で# テストデータに対するスコアも下がっている.(n_samples=40の場合)
# 線形バイアスがかかってるデータなので,直感的にはスコアは上がりそうだが・・・なんで?
# どうやらサンプル数が少ないことが原因っぽい.
コード例 #11
0
# test_x_sc1 = m_sc2.transform(test_x)
# 
# # - 잘못된 scaling => 기준 다름 (train -> train fit, test -> test fit)
# train_x_sc2 = m_sc2.transform(train_x)
# test_x_sc2 = m_sc3.transform(test_x)
# =============================================================================

# 4) 시각화
# 4-1) figure와 subplot 생성 (1 x 3)
fig, ax = plt.subplots(1, 3)

# 4-2) 원본의 분포(x1, x2)의 산점도
import mglearn

plt.rc('font', family = 'Malgun Gothic')
ax[0].scatter(train_x[:, 0], train_x[:, 1], c = mglearn.cm2(0), label = 'train')
ax[0].scatter(test_x[:, 0], test_x[:, 1], c = mglearn.cm2(1), label = 'test')
ax[0].legend()
ax[0].set_title('원본 산점도')
ax[0].set_xlabel('sepal ')
ax[0].set_ylabel('sepal ')

# 4-3) 올바른 scaling 후 데이터(x1, x2)의 산점도
ax[1].scatter(train_x_sc1[:, 0], train_x_sc1[:, 1], c = mglearn.cm2(0))
ax[1].scatter(test_x_sc1[:, 0], test_x_sc1[:, 1], c = mglearn.cm2(1))

# 4-4) 잘못된 scaling 후 데이터(x1, x2)의 산점도
ax[2].scatter(train_x_sc2[:, 0], train_x_sc2[:, 1], c = mglearn.cm2(0))
ax[2].scatter(test_x_sc2[:, 0], test_x_sc2[:, 1], c = mglearn.cm2(1))

# => train data set과 test data set이 분리되어진 상태일 경우 각각 서로 다른 기준으로 scaling
コード例 #12
0
import numpy as np
import matplotlib.pyplot as plt
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler


# make synthetic data
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
# split it into training and test sets
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)

# make c argument 2D
cm20 = np.array(mglearn.cm2(0)).reshape(1, -1)
cm21 = np.array(mglearn.cm2(1)).reshape(1, -1)

# plot the training and test sets
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0], X_train[:, 1],
                c=cm20, label="Training set", s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^',
                c=cm21, label="Test set", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("Original Data")

# scale the data using MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
コード例 #13
0
from sklearn.datasets import make_moons
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# 导入mglearn模块
import sys
sys.path.append("../")
import mglearn

# generate synthetic two_moons data (with less noise this time) 
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)

# cluster the data into two clusters 
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)

# plot the cluster assignments and cluster centers 
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=mglearn.cm2, s=60)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker='^', c=[mglearn.cm2(0), mglearn.cm2(1)], s=100, linewidth=2)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.show()
コード例 #14
0
# ----------------------------------------------------------------------------------------------
#                                       Analyzing KNeighborsRegressor:
# ----------------------------------------------------------------------------------------------

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# create 1,000 data points, evenly spaced between -3 and 3
line = np.linspace(-3, 3, 1000).reshape(-1, 1)

for n_neighbors, ax in zip([1, 3, 9], axes):
    # make predictions using 1, 3, or 9 neighbors
    reg = KNeighborsRegressor(n_neighbors=n_neighbors)
    reg.fit(X_train, y_train)
    ax.plot(line, reg.predict(
        line))  # produces the line of predicted values for points in "line"
    ax.plot(X_train, y_train, '^', c=mglearn.cm2(0),
            markersize=8)  # training set points
    ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1),
            markersize=8)  # test set points
    ax.set_title(
        "{} neighbor(s)\n train score: {:.2f} test score: {:.2f}".format(
            n_neighbors, reg.score(X_train, y_train),
            reg.score(X_test, y_test)))
    ax.set_xlabel("Feature")
    ax.set_ylabel("Target")
axes[0].legend(
    ["Model predictions", "Training data/target", "Test data/target"],
    loc="best")

plt.show()
コード例 #15
0
print("per-feature minimum after scaling:\n{}".format(X_test_scaled.min(axis=0)))
print("per-feature maximum after scaling:\n{}".format(X_test_scaled.max(axis=0)))

########################################################################################################################
########################################################################################################################

import mglearn
from sklearn.datasets import make_blobs
import pylab as plt
# make synthetic data
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
# split it into training and test sets
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)
# plot the training and test sets
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=mglearn.cm2(0), label="Training set", s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^', c=mglearn.cm2(1), label="Test set", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("Original Data")
# scale the data using MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# visualize the properly scaled data
axes[1].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1], c=mglearn.cm2(0), label="Training set", s=60)
axes[1].scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], marker='^', c = mglearn.cm2(1), label="Test set", s=60)
axes[1].set_title("Scaled Data")

# rescale the test set separately
# so test set min is 0 and test set max is 1
コード例 #16
0
# 对数据的训练集进行标准化
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#单独对测试集进行缩放的错误做法
test_scaler=MinMaxScaler()
X_test_scaled_badly = test_scaler.fit_transform(X_test)

#先标准化再切分,与先切分再标准化之间有细微差别
print(X_test_scaled[:,0])
print(X_testx[:,0])
#画图
fig=plt.figure(figsize=(13,4))
ax0=fig.add_subplot(1,4,1)
ax0.scatter(X_train[:,0],X_train[:,1],c=mglearn.cm2(0),label='Training set',s=60)
ax0.scatter(X_test[:,0],X_test[:,1],marker='^',c=mglearn.cm2(1),label='Test set',s=60)
ax0.legend(loc='upper left')
ax0.set_title('Original Data')
ax0.set_xlabel('Feature 0')
ax0.set_ylabel('Feature 1')

ax1=fig.add_subplot(1,4,2)
ax1.scatter(X_train_scaled[:,0],X_train_scaled[:,1],c=mglearn.cm2(0),label='Training set',s=60)
ax1.scatter(X_test_scaled[:,0],X_test_scaled[:,1],marker='^',c=mglearn.cm2(1),label='Test set',s=60)
ax1.legend(loc='upper left')
ax1.set_title('Scaled Data')
ax1.set_xlabel('Feature 0')
ax1.set_ylabel('Feature 1')

ax2=fig.add_subplot(1,4,3)
def main():
    X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
    X_train, X_test = train_test_split(X, random_state=5, test_size=.1)

    # plot the training and test sets
    fig, axes = plt.subplots(1, 3, figsize=(13, 4))
    axes[0].scatter(X_train[:, 0],
                    X_train[:, 1],
                    c=mglearn.cm2(0),
                    label="Training set",
                    s=60)
    axes[0].scatter(X_test[:, 0],
                    X_test[:, 1],
                    marker='^',
                    c=mglearn.cm2(1),
                    label="Test set",
                    s=60)
    axes[0].legend(loc='upper left')
    axes[0].set_title("Original Data")

    # scale the data using MinMaxScaler
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # visualize the properly scaled data
    axes[1].scatter(X_train_scaled[:, 0],
                    X_train_scaled[:, 1],
                    c=mglearn.cm2(0),
                    label="Training set",
                    s=60)
    axes[1].scatter(X_test_scaled[:, 0],
                    X_test_scaled[:, 1],
                    marker='^',
                    c=mglearn.cm2(1),
                    label="Test set",
                    s=60)
    axes[1].set_title("Scaled Data")

    # rescale the test set separately
    # so test set min is 0 and test set max is 1
    # DO NOT DO THIS! For illustration purposes only.
    test_scaler = MinMaxScaler()
    test_scaler.fit(X_test)
    X_test_scaled_badly = test_scaler.transform(X_test)

    # visualize wrongly scaled data
    axes[2].scatter(X_train_scaled[:, 0],
                    X_train_scaled[:, 1],
                    c=mglearn.cm2(0),
                    label="training set",
                    s=60)
    axes[2].scatter(X_test_scaled_badly[:, 0],
                    X_test_scaled_badly[:, 1],
                    marker='^',
                    c=mglearn.cm2(1),
                    label="test set",
                    s=60)
    axes[2].set_title("Improperly Scaled Data")

    for ax in axes:
        ax.set_xlabel("Feature 0")
        ax.set_ylabel("Feature 1")
    fig.tight_layout()
    plt.show()
コード例 #18
0
# データポイントを3つにクラスタリング
kmeans.fit(X)
y_pred = kmeans.predict(X)

# クラスタ割り当てとクラスタセンタをプロット
mglearn.discrete_scatter(X[:, 0], X[:, 1], kmeans.labels_, markers='o')
mglearn.discrete_scatter(
    kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], [0, 1, 2],
    markers='^', markeredgewidth=2)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")

# 合成データセットtwo_moonsデータ作成(ノイズ少な目)
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)

# 2つのクラスタにクラスタ分離
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)

# クラスタ割り当てとクラスタセンタをプロット
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=mglearn.cm2, s=60, edgecolor='k')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
            marker='^', c=[mglearn.cm2(0), mglearn.cm2(1)], s=100, linewidth=2,
            edgecolor='k')
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")

コード例 #19
0
mglearn.discrete_scatter(X[:,0],X[:,1],kmeans.labels_,markers="o")
mglearn.discrete_scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],
                         [0,1,2],markers="^",markeredgewidth=2)
plt.xlabel("feature 0")
plt.ylabel("feature 1")

#two moon data
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)

kmeans = KMeans(n_clusters=2).fit(X)
y_pred = kmeans.predict(X)

plt.scatter(X[:,0],X[:,1],c=y_pred, cmap=mglearn.cm2, s=60, edgecolors="k")
plt.scatter(kmeans.cluster_centers_[:,0],kmeans.cluster_centers_[:,1],marker="^",
            c=[mglearn.cm2(0),mglearn.cm2(1)],s=100, linewidth=2, edgecolors="k")
plt.xlabel("feature 0")
plt.ylabel("feature 1")

##벡터 양자화(k평균 군집의 중점을 분해로 볼 수 있다는 취지)
#face data
from sklearn.datasets import fetch_lfw_people
people = fetch_lfw_people(min_faces_per_person=20,resize=0.7)
image_shape = people.images[0].shape

mask = np.zeros(people.target.shape, dtype=np.bool)
for target in np.unique(people.target):#타겟별 빈도가 달라 50개씩 일괄 추출
    mask[np.where(people.target == target)[0][:50]] = 1

X_people = people.data[mask]
y_people = people.target[mask]
コード例 #20
0
# Scaling Training and Test Data the Same Way----------------------------------
"""
It is important to apply exactly the same transformation to the training set and the
test set for the supervised model to work on the test set. The following example
illustrates what would happen if we were to use the minimum and range
of the test set instead.
"""
from sklearn.datasets import make_blobs
# make synthetic data
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
# split it into training and test sets
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)
# plot the training and test sets
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0], X_train[:, 1],
c=mglearn.cm2(0), label="Training set", s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^',
c=mglearn.cm2(1), label="Test set", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("Original Data")
# scale the data using MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# visualize the properly scaled data
axes[1].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1],
c=mglearn.cm2(0), label="Training set", s=60)
axes[1].scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], marker='^',
c=mglearn.cm2(1), label="Test set", s=60)
axes[1].set_title("Scaled Data")
コード例 #21
0
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.show()
#In[54]
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)

kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)

plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=mglearn.cm2, s=60, edgecolor='k')
plt.scatter(kmeans.cluster_centers_[:, 0],
            kmeans.cluster_centers_[:, 1],
            marker='^',
            c=[mglearn.cm2(0), mglearn.cm2(1)],
            s=100,
            linewidth=2,
            edgecolor='k')
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
#In[55]

#In[56]

#In[59]
mglearn.plots.plot_agglomerative_algorithm()
plt.show()

#In[60]
from sklearn.cluster import AgglomerativeClustering
コード例 #22
0
of goodness of a prediction for a regression model, and yields a score between 0
and 1. A value of 1 corresponds to a perfect prediction, and a value of 0 corresponds
to a constant model that just predicts the mean of the training set responses, y_train
"""
print('Test set R^2 : {:.2f}'.format(reg.score(X_test,y_test)))

# Analyzing KNeighborsRegressor------------------------------------------------
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# create 1,000 data points, evenly spaced between -3 and 3
line = np.linspace(-3, 3, 1000).reshape(-1, 1)
for n_neighbors, ax in zip([1, 3, 9], axes):
    # make predictions using 1, 3, or 9 neighbors
    reg = KNeighborsRegressor(n_neighbors=n_neighbors)
    reg.fit(X_train, y_train)
    ax.plot(line, reg.predict(line))
    ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8)
    ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)
    ax.set_title("{} neighbor(s)\n train score: {:.2f} test score: {:.2f}".format(
    n_neighbors, reg.score(X_train, y_train),
    reg.score(X_test, y_test)))
    ax.set_xlabel("Feature")
    ax.set_ylabel("Target")
axes[0].legend(["Model predictions", "Training data/target",
                    "Test data/target"], loc="best")

#------------------------------------------------------------------------------Linear Models
# Linear models for regression-------------------------------------------------
mglearn.plots.plot_linear_regression_wave()

# Linear regression (aka ordinary least squares)-------------------------------
from sklearn.linear_model import LinearRegression
コード例 #23
0
# %%
reg = KNeighborsRegressor(n_neighbors=3)
reg.fit(X_train, y_train)

# %%
print('Test set predictions: \n{}'.format(reg.predict(X_test)))

# %%
print('Test set R^2: {:.2f}'.format(reg.score(X_test, y_test)))

# %%
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
line = np.linspace(-3, 3, 1000).reshape(-1, 1)
for n_neighbors, ax in zip([1, 3, 9], axes):
    reg = KNeighborsRegressor(n_neighbors)
    reg.fit(X_train, y_train)
    ax.plot(line, reg.predict(line))
    ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8)
    ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)

    ax.set_title('{} neighbor(s)\n train score: {:.2f} test score: {:.2f}'.format(
        n_neighbors, reg.score(X_train, y_train), reg.score(X_test, y_test)))
    ax.set_xlabel('Feature')
    ax.set_ylabel('Target')

axes[0].legend(['Model predictions', 'Training data/target',
                'Test data/target'], loc='best')

# %%
@author: Yuan-Ray Chang
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
X_train, X_test = train_test_split(X, random_state=5, test_size=0.1)
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:,0], X_train[:,1], c=mglearn.cm2(0), \
    label="Training set", s=60)
axes[0].scatter(X_test[:,0], X_test[:,1], marker='^', \
    c=mglearn.cm2(1), label="Test set", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("Original data")

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

axes[1].scatter(X_train_scaled[:,0], X_train_scaled[:,1], c=mglearn.cm2(0), \
    label="Training set", s=60)
axes[1].scatter(X_test_scaled[:,0], X_test_scaled[:,1], marker='^', \
    c=mglearn.cm2(1), label="Test set", s=60)