Exemple #1
0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn import neighbors, datasets
from ex16 import load_data

# Load input data
input_file = 'data_nn_classifier.txt'
data = load_data(input_file)
X, y = data[:, :-1], data[:, -1].astype(np.int)

# Plot input data
plt.figure()
plt.title('Input datapoints')
markers = '^sov<>hp'
mapper = np.array([markers[i] for i in y])
for i in range(X.shape[0]):
    plt.scatter(X[i, 0],
                X[i, 1],
                marker=mapper[i],
                s=50,
                edgecolors='black',
                facecolors='none')
# Number of nearest neighbors to consider
num_neighbors = 10
# step size of the grid
h = 0.01
# Create a K-Neighbours Classifier model and train it
classifier = neighbors.KNeighborsClassifier(num_neighbors, weights='distance')
classifier.fit(X, y)
# Create the mesh to plot the boundaries
Exemple #2
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.cluster import KMeans
import ex16

data = ex16.load_data('data_multivar.txt')
num_clusters = 4

plt.figure()
plt.scatter(data[:, 0],
            data[:, 1],
            marker='o',
            facecolors='none',
            edgecolors='k',
            s=30)
x_min, x_max = min(data[:, 0]) - 1, max(data[:, 0]) + 1
y_min, y_max = min(data[:, 1]) - 1, max(data[:, 1]) + 1
plt.title('Input data')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()

kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
kmeans.fit(data)

# Step size of the mesh
step_size = 0.01
# Plot the boundaries
Exemple #3
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.cluster import KMeans
import ex16

# Load data
data = ex16.load_data('data_perf.txt')

scores = []
range_values = np.arange(2, 10)
for i in range_values:
    # Train the model
    kmeans = KMeans(init='k-means++', n_clusters=i, n_init=10)
    kmeans.fit(data)
    score = metrics.silhouette_score(data,
                                     kmeans.labels_,
                                     metric='euclidean',
                                     sample_size=len(data))
    print "\nNumber of clusters =", i
    print "Silhouette score =", score
    scores.append(score)
# Plot scores
plt.figure()
plt.bar(range_values, scores, width=0.6, color='k', align='center')
plt.title('Silhouette score vs number of clusters')

# Plot data
plt.figure()
plt.scatter(data[:, 0],
            data[:, 1],