Exemple #1
0
def main():
    """
    DO NOT TOUCH THIS FUNCTION. IT IS USED FOR COMPUTER EVALUATION OF YOUR CODE
    """
    results = my_info() + "\t\t"
    print results + "\t\t"
    X, Y = mnist.read_mnist_training_data(500)
    centriods = X[:10]
    cm, c = kmeans(X, centriods)
    mnist.visualize(cm)
    # for mean, cluster in zip(cm, c):
    # mnist.visualize(np.insert(cluster, 0, mean, axis=0))

    centriods_unique = np.array([X[np.where(Y == i)[0][0]] for i in range(10)])
    cm, c = kmeans(X, centriods_unique)
    mnist.visualize(cm)
    # for mean, cluster in zip(cm, c):
    # mnist.visualize(np.insert(cluster, 0, mean, axis=0))

    distances = distance.cdist(X, X, "euclidean")
    medoids_idx, clusters = kmedoids(distances, list(range(10)))
    medoids = np.array([X[int(i)] for i in medoids_idx])
    c = np.array([X[clusters == i] for i in range(10)])
    mnist.visualize(medoids)
    # for mean, cluster in zip(cm, c):
    # mnist.visualize(np.insert(cluster, 0, mean, axis=0))

    mediod_idx = [np.where(Y == i)[0][0] for i in range(10)]
    medoids_idx, clusters = kmedoids(distances, mediod_idx)
    medoids = np.array([X[int(i)] for i in medoids_idx])
    c = np.array([X[clusters == i] for i in range(10)])
    mnist.visualize(medoids)
Exemple #2
0
from sklearn.metrics import confusion_matrix


"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
<<<<<<< HEAD
student_number = 'sample_solution'
=======
student_ID = ''
>>>>>>> 622382c2ed459a743b28d10dffc1e837df0911a1


X, y = mnist.read_mnist_training_data(N=5000)

X_train = X[0:2500]
X_test = X[2500:5000]
y_train = y[0:2500]
y_test = y[2500:5000]

def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
Exemple #3
0
import numpy as np
import mnist_load_show as mnist
from sklearn.metrics import confusion_matrix
"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
student_ID = ''


X, y = mnist.read_mnist_training_data()


def my_info():
    """
    :return: DO NOT FORGET to include your student ID as a string, this function is used to evaluate your code and results
    """
    return student_ID


def one_vs_all():
    """
    Implement the the multi label classifier using one_vs_all paradigm and return the confusion matrix
    :return: the confusion matrix regarding the result obtained using the classifier
    """
    one_vs_all_conf_matrix = ''
    return one_vs_all_conf_matrix


def all_vs_all():
import mnist_load_show as mnist
import numpy as np

X, y = mnist.read_mnist_training_data()


def kmeans(data_matrix, initial_cluster_means):
    assignment = dict()
    cluster_means = initial_cluster_means.copy()

    while True:
        # Assign the new vector to a cluster
        for i in range(len(data_matrix)):
            best_cluster_id = -1
            best_distance = 1000000000.0

            for k in range(10):
                dist = np.linalg.norm(data_matrix[i] - cluster_means[k])
                dist *= dist # squared Euclidean distance

                if best_distance > dist:
                    best_distance = dist
                    best_cluster_id = k

            assignment[i] = best_cluster_id

        # Update the cluster means
        new_cluster_means = np.zeros([10, len(initial_cluster_means[0])])
        cluster_sizes = [0 for ii in range(10)]

        for i in range(len(data_matrix)):
Exemple #5
0
        if type(cr_counts).__module__ != 'numpy':
            cr_counts = [0]
        if type(cl_counts).__module__ != 'numpy':
            cl_counts = [0]
        if sum(cl_counts) == 0:
            tot_l = 0
            probs_left = np.ones(10) * 0.1
            E_left = -1 * sum(probs_left * np.log(probs_left))
        else:
            tot_l = sum(cl_counts)
            probs_left = (cl_counts / float(tot_l))
            E_left = -1 * sum(probs_left * np.log(probs_left))
        if sum(cr_counts) == 0:
            tot_r = 0
            probs_right = np.ones(10) * 0.1
            E_right = -1 * sum(probs_right * np.log(probs_right))
        else:
            tot_r = sum(cr_counts)
            probs_right = (cr_counts / float(tot_r))
            E_right = -1 * sum(probs_right * np.log(probs_right))
        IG = E_tot - (tot_l / (1. * n)) * E_left - (tot_r / (1. * n)) * E_right
        res.append([IG, ri, i, probs_left, probs_right])
        return res


x, y = mnist.read_mnist_training_data(10000)
gt_stump_v2(x, y)
tempvec = [(res[i][0], i) for i in range(len(res))]
tempvec.sort()
s1 = tempvec[-1]
stump = r1[s1[1]]
import numpy as np
import mnist_load_show as mnist
'''
use pdis in order to find the the distance
'''
from scipy.spatial.distance import cdist
"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
student_ID = '013593012'

TOTAL_IMAGES = 5000
### LOAD ########THE DATA #########################
X, y = mnist.read_mnist_training_data(TOTAL_IMAGES)
###################################################


def split(X, y):
    XTrain = []
    XTest = []

    yTrain = []
    yTest = []

    length = len(X)
    half = length / 2

    for i in range(0, half):
        XTrain.append(X[i])
import numpy as np
import mnist_load_show
from sklearn.metrics import confusion_matrix
"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
student_ID = '014632888'

TRAINING_SIZE = 30000
TEST_SIZE = 30000
NUM_EPOCHS = 10

X, Y = mnist_load_show.read_mnist_training_data()

training_set = X[:TRAINING_SIZE]
training_labels = Y[:TRAINING_SIZE]
test_set = X[TRAINING_SIZE : TRAINING_SIZE + TEST_SIZE]
test_labels = Y[TRAINING_SIZE: TRAINING_SIZE + TEST_SIZE]


# adding the bias column
training_set = np.insert(training_set, 0, 1, axis=1)
test_set = np.insert(test_set, 0, 1, axis=1)
DIMENSIONS = len(training_set[0])

def predict_one_vs_all_label(x, W):
    x = x.transpose()
    return np.argmax(W.dot(x))
        for i in range(len(mediod_indices)):
            curr_cluster_indices = np.argwhere(cluster_indices == i).flatten()
            reduced_dissimilarity = dissimilarity[
                curr_cluster_indices].transpose()[curr_cluster_indices]
            new_mediod_reduced_i = np.argmin(
                np.sum(reduced_dissimilarity, axis=1))
            new_mediod_i = curr_cluster_indices[new_mediod_reduced_i]
            new_mediod_indices[i] = new_mediod_i

        changed = (new_mediod_indices != mediod_indices).any()
        mediod_indices = np.copy(new_mediod_indices)

    return new_mediod_indices, cluster_indices


X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE)

first_ten = X[:10]
# select first instance of each label
first_label_instance = np.array([X[np.where(Y == i)[0][0]] for i in range(10)])

cluster_means, clusters = k_means(X, first_ten)
mnist_load_show.visualize(cluster_means)
for mean, cluster in zip(cluster_means, clusters):
    mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))

cluster_means, clusters = k_means(X, first_label_instance)
mnist_load_show.visualize(cluster_means)
for mean, cluster in zip(cluster_means, clusters):
    mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))
        new_mediod_indices = np.zeros(mediod_indices.shape)

        for i in range(len(mediod_indices)):
            curr_cluster_indices = np.argwhere(cluster_indices == i).flatten()
            reduced_dissimilarity = dissimilarity[curr_cluster_indices].transpose()[curr_cluster_indices]
            new_mediod_reduced_i = np.argmin(np.sum(reduced_dissimilarity, axis=1))
            new_mediod_i = curr_cluster_indices[new_mediod_reduced_i]
            new_mediod_indices[i] = new_mediod_i

        changed = (new_mediod_indices != mediod_indices).any()
        mediod_indices = np.copy(new_mediod_indices)

    return new_mediod_indices, cluster_indices


X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE)

first_ten = X[:10]
# select first instance of each label
first_label_instance = np.array([ X[np.where(Y == i)[0][0]] for i in range(10) ])

cluster_means, clusters = k_means(X, first_ten)
mnist_load_show.visualize(cluster_means)
for mean, cluster in zip(cluster_means, clusters):
    mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))

cluster_means, clusters = k_means(X, first_label_instance)
mnist_load_show.visualize(cluster_means)
for mean, cluster in zip(cluster_means, clusters):
    mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))
def main():
    test_number = 1

    while load_parameters(test_number):
        output = open(output_file, 'w')
        output.write("Test started.\n\n")
        output.close()

        # Read data here
        full_mnist_data, full_mnist_labels = mnist.read_mnist_training_data(train_size + test_size)
        training_data = [full_mnist_data[:train_size], full_mnist_labels[:train_size]]
        test_data = [full_mnist_data[train_size:], full_mnist_labels[train_size:]]

        # Vectorize labels
        training_data[1] = label_vectors_from_indicies(training_data[1], 10)
        test_data[1] = label_vectors_from_indicies(test_data[1], 10)

        # Normalize data
        training_data[0] = (training_data[0].astype(float) - training_data[0].mean()) / 255.0
        test_data[0] = (test_data[0].astype(float) - test_data[0].mean()) / 255.0

        # Modified data set for autoencoder
        autoencoder_training_data = [training_data[0], training_data[0]]
        autoencoder_test_data = [test_data[0], test_data[0]]

        # Initialize network(s) here
        input_size = (28 * 28)  # Pixels in the image
        output_size = 10  # Possible classifications
        layer_sizes = np.asarray([input_size] + feedforward_classifier_hidden_layers + [output_size])
        initialization_params = [layer_sizes]
        feedforward_classifier_state = None
        feedforward_classifier_connections = None
        [feedforward_classifier_state, feedforward_classifier_connections] = init_feedforward_classifier(
            initialization_params)

        # Change network shape for auto-encoder
        autoencoder_layer_sizes = np.asarray([input_size] + autoencoder_hidden_layers + [input_size])
        autoencoder_initialization_params = [autoencoder_layer_sizes]
        autoencoder_state = None
        autoencoder_connections = None
        [autoencoder_state, autoencoder_connections] = init_autoencoder(autoencoder_initialization_params)

        # Two sets of parameters for autoencoder classifier
        autoencoder_classifier_classifier_layer_sizes = np.asarray([autoencoder_classifier_autoencoder_hidden_layers[-1]] +
                                                                   autoencoder_classifier_classifier_hidden_layers +
                                                                   [output_size])
        autoencoder_classifier_autoencoder_layer_sizes = np.asarray([input_size] +
                                                                    autoencoder_classifier_autoencoder_hidden_layers +
                                                                    [input_size])
        autoencoder_classifier_init_params = [[autoencoder_classifier_autoencoder_layer_sizes],
                                              [autoencoder_classifier_classifier_layer_sizes]]
        autoencoder_classifier_state = None
        autoencoder_classifier_connections = None
        [autoencoder_classifier_state, autoencoder_classifier_connections] = init_autoencoder_classifier(
            autoencoder_classifier_init_params)

        # Train network(s) here
        training_params = [training_runs]
        if not skip_feedforward_classifier:
            feedforward_classifier_connections = train_feedforward_classifier(feedforward_classifier_state,
                                                                              feedforward_classifier_connections,
                                                                              training_data, training_params)
        if not skip_autoencoder:
            autoencoder_connections = train_autoencoder(autoencoder_state, autoencoder_connections,
                                                        autoencoder_training_data, training_params)
        if not skip_autoencoder_classifier:
            [autoencoder_classifier_state, autoencoder_classifier_connections] = train_autoencoder_classifier(
                autoencoder_classifier_state, autoencoder_classifier_connections, training_data, training_params)

        # Test network(s) here
        test_params = None
        if not skip_feedforward_classifier:
            test_feedforward_classifier(feedforward_classifier_state, feedforward_classifier_connections, test_data,
                                        test_params)
        if not skip_autoencoder:
            test_autoencoder(autoencoder_state, autoencoder_connections, autoencoder_test_data, test_params)
        if not skip_autoencoder_classifier:
            test_autoencoder_classifier(autoencoder_classifier_state, autoencoder_classifier_connections, test_data,
                                        test_params)
        output = open(output_file, 'a')
        output.write("Test finished.\n")
        output.close()
        test_number += 1
Exemple #11
0
import numpy as np
import mnist_load_show
from sklearn.metrics import confusion_matrix
"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
student_ID = '014632888'

TRAINING_SIZE = 30000
TEST_SIZE = 30000
NUM_EPOCHS = 10

X, Y = mnist_load_show.read_mnist_training_data()

training_set = X[:TRAINING_SIZE]
training_labels = Y[:TRAINING_SIZE]
test_set = X[TRAINING_SIZE:TRAINING_SIZE + TEST_SIZE]
test_labels = Y[TRAINING_SIZE:TRAINING_SIZE + TEST_SIZE]

# adding the bias column
training_set = np.insert(training_set, 0, 1, axis=1)
test_set = np.insert(test_set, 0, 1, axis=1)
DIMENSIONS = len(training_set[0])


def predict_one_vs_all_label(x, W):
    x = x.transpose()
    return np.argmax(W.dot(x))
'''
use pdis in order to find the the distance
'''
from scipy.spatial.distance import cdist

"""
============================================
DO NOT FORGET TO INCLUDE YOUR STUDENT ID
============================================
"""
student_ID = '013593012'

TOTAL_IMAGES = 5000
### LOAD ########THE DATA #########################
X, y = mnist.read_mnist_training_data(TOTAL_IMAGES)
###################################################


def split(X, y):
    XTrain = []
    XTest  = []

    yTrain = []
    yTest  = []

    length = len(X)
    half   = length / 2

    for i in range(0, half):
        XTrain.append(X[i])