Exemple #1
0
def read_data(only_2_features=True):
    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    if only_2_features:
        X = X[:, :2]
    return X, y


if __name__ == '__main__':
    X, y = read_data()
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
    svm1 = svm.SVC()
    svm1.fit(X, y)
    ax1.set_title("SVC linear")
    plot_areas(lambda x: svm1.predict(x), 0.1, X, ax1)
    plot_2d_classes(X, y, 'ryb', ax1)
    svm2 = svm.SVC(kernel='poly', degree=2)
    svm2.fit(X, y)
    ax2.set_title("SVC polynomial, deg: 2")
    plot_areas(lambda x: svm2.predict(x), 0.1, X, ax2)
    plot_2d_classes(X, y, 'ryb', ax2)
    svm3 = svm.SVC(kernel='poly', degree=3)
    svm3.fit(X, y)
    ax3.set_title("SVC polynomial, deg: 3")
    plot_areas(lambda x: svm3.predict(x), 0.1, X, ax3)
    plot_2d_classes(X, y, 'ryb', ax3)
    svm4 = svm.SVC(kernel='poly', degree=6)
    svm4.fit(X, y)
    ax4.set_title("SVC polynomial, deg: 6")
    plot_areas(lambda x: svm4.predict(x), 0.1, X, ax4)
    plot_2d_classes(X, y, 'ryb', ax4)
Exemple #2
0
        cnn_percents = []
        for i in range(10):
            X_training, y_training, X_test, y_test = split_dataset(X, y, 0.7)
            if cnn:
                len_before = X_training.shape[0]
                X_training, y_training = cnn_transform(X_training, y_training, k, metric)
                len_after = X_training.shape[0]
                cnn_percents.append(float(len_after) / float(len_before) * 100.0)
            predictions = []
            for i in range(X_test.shape[0]):
                predictions.append(kNN(X_training, X_training, y_training, X_test[i, :], k, metric))
            if SHOW_PREDICTIONS_AND_REAL_VALUES:
                print('Prediction, actual:')
                for i in range(X_test.shape[0]):
                    print(predictions[i], y_test[i])
            correct = 0
            for i in range(len(predictions)):
                if y_test[i] == predictions[i]:
                    correct += 1
            accuracies.append(float(correct) / float(len(predictions)) * 100.0)
        print("Accuracy:", str(np.mean(accuracies)) + '%', 'StdDev:', np.std(accuracies))
        if cnn:
            print("CNN:", str(np.mean(cnn_percents)) + '%', 'StdDev:', np.std(cnn_percents))
    else:
        if cnn:
            X, y = cnn_transform(X, y, k, metric)
        plot_areas(lambda x: kNN(X, X, y, x, k, metric), 0.1, X)
        plot_2d_classes(X, y, 'ryb')
        cnn_str = '_cnn' if cnn else ''
        plt.savefig('plots/k' + str(k) + '_' + metric.__name__ + cnn_str + '.png')
Exemple #3
0
def kmeanspp_init(X, k):
    means = []
    rand_index = np.random.random_integers(0, len(X) - 1)
    means.append(X[rand_index])
    for x in range(1, k):
        min_distances = [np.min([euclidean(x, mean) for mean in means]) ** 2 for x in X]
        probs = [min_distances[i] / sum(min_distances) for i in range(len(min_distances))]
        new_mean_index = np.random.choice([x for x in range(len(X))], p=probs)
        means.append(X[new_mean_index])
    return np.array(means)


if __name__ == '__main__':
    X, y = generate_dataset(100)
    colors = ['r', 'g', 'b', 'w', 'c', 'm', 'y', 'k', '0.75']
    plot_2d_classes(X, y, colors)
    plt.gca().set_title("Dataset")
    plt.savefig('partA/dataset.png')
    fig = plt.figure()
    ax = plt.gca()
    iterations = 50
    k = 9
    reps = 5
    for method in [random_init, forgy_init, random_partition_init, kmeanspp_init]:
        print(method)
        qualities = []
        for _ in range(reps):
            for i, quality in k_means(method, X, k, iterations):
                qualities.append((i, quality))
        avgs = []
        stds = []
Exemple #4
0
                new_means[y[i]][j] += X[i][j]
            points_num[y[i]] += 1
        print(points_num)
        for i in range(k):
            for j in range(2):
                if points_num[i] > 0:
                    new_means[i][j] /= float(points_num[i])
        sums = [0 for i in range(k)]
        for i in range(X.shape[0]):
            sums[y[i]] += metric(new_means[y[i]], X[i, :])
        sum = np.sum(sums)
        if sum - best_sum > -EPSILON:
            break
        best_sum = sum
        means = new_means
        print(means)

    return means, y


if __name__ == '__main__':
    X = read_data()
    k = 3
    means, y = kMeans(X, k)
    plot_2d_classes(X, np.array(y), 'ryb')
    colors = 'ryb'
    for i in range(k):
        [x, y] = means[i]
        plt.plot(x, y, '^', c=colors[i])
    plt.show()
Exemple #5
0
    return np.array([np.average(clusters[i], axis=0) for i in range(k)])


def kmeanspp_init(X, k):
    means = []
    rand_index = np.random.random_integers(0, len(X) - 1)
    means.append(X[rand_index])
    for x in range(1, k):
        min_distances = [np.min([euclidean(x, mean) for mean in means]) ** 2 for x in X]
        probs = [min_distances[i] / sum(min_distances) for i in range(len(min_distances))]
        new_mean_index = np.random.choice([x for x in range(len(X))], p=probs)
        means.append(X[new_mean_index])
    return np.array(means)


if __name__ == '__main__':
    X, y = generate_dataset(100)
    colors = ['r', 'g', 'b', 'w', 'c', 'm', 'y', 'k', '0.75']
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
    axs = [ax1, ax2, ax3, ax4]
    iterations = 50
    k = 9
    ax_i = 0
    for method in [random_init, forgy_init, random_partition_init, kmeanspp_init]:
        print(method.__name__[:-5])
        y_m = k_means(method, X, k, iterations)
        plot_2d_classes(X, y_m, colors, axs[ax_i])
        axs[ax_i].set_title(method.__name__[:-5])
        ax_i += 1
    plt.savefig('partA/clusters.png')