def read_data(only_2_features=True): iris = datasets.load_iris() X, y = iris.data, iris.target if only_2_features: X = X[:, :2] return X, y if __name__ == '__main__': X, y = read_data() fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2) svm1 = svm.SVC() svm1.fit(X, y) ax1.set_title("SVC linear") plot_areas(lambda x: svm1.predict(x), 0.1, X, ax1) plot_2d_classes(X, y, 'ryb', ax1) svm2 = svm.SVC(kernel='poly', degree=2) svm2.fit(X, y) ax2.set_title("SVC polynomial, deg: 2") plot_areas(lambda x: svm2.predict(x), 0.1, X, ax2) plot_2d_classes(X, y, 'ryb', ax2) svm3 = svm.SVC(kernel='poly', degree=3) svm3.fit(X, y) ax3.set_title("SVC polynomial, deg: 3") plot_areas(lambda x: svm3.predict(x), 0.1, X, ax3) plot_2d_classes(X, y, 'ryb', ax3) svm4 = svm.SVC(kernel='poly', degree=6) svm4.fit(X, y) ax4.set_title("SVC polynomial, deg: 6") plot_areas(lambda x: svm4.predict(x), 0.1, X, ax4) plot_2d_classes(X, y, 'ryb', ax4)
cnn_percents = [] for i in range(10): X_training, y_training, X_test, y_test = split_dataset(X, y, 0.7) if cnn: len_before = X_training.shape[0] X_training, y_training = cnn_transform(X_training, y_training, k, metric) len_after = X_training.shape[0] cnn_percents.append(float(len_after) / float(len_before) * 100.0) predictions = [] for i in range(X_test.shape[0]): predictions.append(kNN(X_training, X_training, y_training, X_test[i, :], k, metric)) if SHOW_PREDICTIONS_AND_REAL_VALUES: print('Prediction, actual:') for i in range(X_test.shape[0]): print(predictions[i], y_test[i]) correct = 0 for i in range(len(predictions)): if y_test[i] == predictions[i]: correct += 1 accuracies.append(float(correct) / float(len(predictions)) * 100.0) print("Accuracy:", str(np.mean(accuracies)) + '%', 'StdDev:', np.std(accuracies)) if cnn: print("CNN:", str(np.mean(cnn_percents)) + '%', 'StdDev:', np.std(cnn_percents)) else: if cnn: X, y = cnn_transform(X, y, k, metric) plot_areas(lambda x: kNN(X, X, y, x, k, metric), 0.1, X) plot_2d_classes(X, y, 'ryb') cnn_str = '_cnn' if cnn else '' plt.savefig('plots/k' + str(k) + '_' + metric.__name__ + cnn_str + '.png')
def kmeanspp_init(X, k): means = [] rand_index = np.random.random_integers(0, len(X) - 1) means.append(X[rand_index]) for x in range(1, k): min_distances = [np.min([euclidean(x, mean) for mean in means]) ** 2 for x in X] probs = [min_distances[i] / sum(min_distances) for i in range(len(min_distances))] new_mean_index = np.random.choice([x for x in range(len(X))], p=probs) means.append(X[new_mean_index]) return np.array(means) if __name__ == '__main__': X, y = generate_dataset(100) colors = ['r', 'g', 'b', 'w', 'c', 'm', 'y', 'k', '0.75'] plot_2d_classes(X, y, colors) plt.gca().set_title("Dataset") plt.savefig('partA/dataset.png') fig = plt.figure() ax = plt.gca() iterations = 50 k = 9 reps = 5 for method in [random_init, forgy_init, random_partition_init, kmeanspp_init]: print(method) qualities = [] for _ in range(reps): for i, quality in k_means(method, X, k, iterations): qualities.append((i, quality)) avgs = [] stds = []
new_means[y[i]][j] += X[i][j] points_num[y[i]] += 1 print(points_num) for i in range(k): for j in range(2): if points_num[i] > 0: new_means[i][j] /= float(points_num[i]) sums = [0 for i in range(k)] for i in range(X.shape[0]): sums[y[i]] += metric(new_means[y[i]], X[i, :]) sum = np.sum(sums) if sum - best_sum > -EPSILON: break best_sum = sum means = new_means print(means) return means, y if __name__ == '__main__': X = read_data() k = 3 means, y = kMeans(X, k) plot_2d_classes(X, np.array(y), 'ryb') colors = 'ryb' for i in range(k): [x, y] = means[i] plt.plot(x, y, '^', c=colors[i]) plt.show()
return np.array([np.average(clusters[i], axis=0) for i in range(k)]) def kmeanspp_init(X, k): means = [] rand_index = np.random.random_integers(0, len(X) - 1) means.append(X[rand_index]) for x in range(1, k): min_distances = [np.min([euclidean(x, mean) for mean in means]) ** 2 for x in X] probs = [min_distances[i] / sum(min_distances) for i in range(len(min_distances))] new_mean_index = np.random.choice([x for x in range(len(X))], p=probs) means.append(X[new_mean_index]) return np.array(means) if __name__ == '__main__': X, y = generate_dataset(100) colors = ['r', 'g', 'b', 'w', 'c', 'm', 'y', 'k', '0.75'] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2) axs = [ax1, ax2, ax3, ax4] iterations = 50 k = 9 ax_i = 0 for method in [random_init, forgy_init, random_partition_init, kmeanspp_init]: print(method.__name__[:-5]) y_m = k_means(method, X, k, iterations) plot_2d_classes(X, y_m, colors, axs[ax_i]) axs[ax_i].set_title(method.__name__[:-5]) ax_i += 1 plt.savefig('partA/clusters.png')