import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from sklearn import neighbors, datasets from ex16 import load_data # Load input data input_file = 'data_nn_classifier.txt' data = load_data(input_file) X, y = data[:, :-1], data[:, -1].astype(np.int) # Plot input data plt.figure() plt.title('Input datapoints') markers = '^sov<>hp' mapper = np.array([markers[i] for i in y]) for i in range(X.shape[0]): plt.scatter(X[i, 0], X[i, 1], marker=mapper[i], s=50, edgecolors='black', facecolors='none') # Number of nearest neighbors to consider num_neighbors = 10 # step size of the grid h = 0.01 # Create a K-Neighbours Classifier model and train it classifier = neighbors.KNeighborsClassifier(num_neighbors, weights='distance') classifier.fit(X, y) # Create the mesh to plot the boundaries
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn.cluster import KMeans import ex16 data = ex16.load_data('data_multivar.txt') num_clusters = 4 plt.figure() plt.scatter(data[:, 0], data[:, 1], marker='o', facecolors='none', edgecolors='k', s=30) x_min, x_max = min(data[:, 0]) - 1, max(data[:, 0]) + 1 y_min, y_max = min(data[:, 1]) - 1, max(data[:, 1]) + 1 plt.title('Input data') plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) plt.xticks(()) plt.yticks(()) plt.show() kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10) kmeans.fit(data) # Step size of the mesh step_size = 0.01 # Plot the boundaries
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn.cluster import KMeans import ex16 # Load data data = ex16.load_data('data_perf.txt') scores = [] range_values = np.arange(2, 10) for i in range_values: # Train the model kmeans = KMeans(init='k-means++', n_clusters=i, n_init=10) kmeans.fit(data) score = metrics.silhouette_score(data, kmeans.labels_, metric='euclidean', sample_size=len(data)) print "\nNumber of clusters =", i print "Silhouette score =", score scores.append(score) # Plot scores plt.figure() plt.bar(range_values, scores, width=0.6, color='k', align='center') plt.title('Silhouette score vs number of clusters') # Plot data plt.figure() plt.scatter(data[:, 0], data[:, 1],