def main(): """ DO NOT TOUCH THIS FUNCTION. IT IS USED FOR COMPUTER EVALUATION OF YOUR CODE """ results = my_info() + "\t\t" print results + "\t\t" X, Y = mnist.read_mnist_training_data(500) centriods = X[:10] cm, c = kmeans(X, centriods) mnist.visualize(cm) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) centriods_unique = np.array([X[np.where(Y == i)[0][0]] for i in range(10)]) cm, c = kmeans(X, centriods_unique) mnist.visualize(cm) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) distances = distance.cdist(X, X, "euclidean") medoids_idx, clusters = kmedoids(distances, list(range(10))) medoids = np.array([X[int(i)] for i in medoids_idx]) c = np.array([X[clusters == i] for i in range(10)]) mnist.visualize(medoids) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) mediod_idx = [np.where(Y == i)[0][0] for i in range(10)] medoids_idx, clusters = kmedoids(distances, mediod_idx) medoids = np.array([X[int(i)] for i in medoids_idx]) c = np.array([X[clusters == i] for i in range(10)]) mnist.visualize(medoids)
from sklearn.metrics import confusion_matrix """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ <<<<<<< HEAD student_number = 'sample_solution' ======= student_ID = '' >>>>>>> 622382c2ed459a743b28d10dffc1e837df0911a1 X, y = mnist.read_mnist_training_data(N=5000) X_train = X[0:2500] X_test = X[2500:5000] y_train = y[0:2500] y_test = y[2500:5000] def bmatrix(a): """Returns a LaTeX bmatrix :a: numpy array :returns: LaTeX bmatrix as a string """ if len(a.shape) > 2: raise ValueError('bmatrix can at most display two dimensions') lines = str(a).replace('[', '').replace(']', '').splitlines()
import numpy as np import mnist_load_show as mnist from sklearn.metrics import confusion_matrix """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ student_ID = '' X, y = mnist.read_mnist_training_data() def my_info(): """ :return: DO NOT FORGET to include your student ID as a string, this function is used to evaluate your code and results """ return student_ID def one_vs_all(): """ Implement the the multi label classifier using one_vs_all paradigm and return the confusion matrix :return: the confusion matrix regarding the result obtained using the classifier """ one_vs_all_conf_matrix = '' return one_vs_all_conf_matrix def all_vs_all():
import mnist_load_show as mnist import numpy as np X, y = mnist.read_mnist_training_data() def kmeans(data_matrix, initial_cluster_means): assignment = dict() cluster_means = initial_cluster_means.copy() while True: # Assign the new vector to a cluster for i in range(len(data_matrix)): best_cluster_id = -1 best_distance = 1000000000.0 for k in range(10): dist = np.linalg.norm(data_matrix[i] - cluster_means[k]) dist *= dist # squared Euclidean distance if best_distance > dist: best_distance = dist best_cluster_id = k assignment[i] = best_cluster_id # Update the cluster means new_cluster_means = np.zeros([10, len(initial_cluster_means[0])]) cluster_sizes = [0 for ii in range(10)] for i in range(len(data_matrix)):
if type(cr_counts).__module__ != 'numpy': cr_counts = [0] if type(cl_counts).__module__ != 'numpy': cl_counts = [0] if sum(cl_counts) == 0: tot_l = 0 probs_left = np.ones(10) * 0.1 E_left = -1 * sum(probs_left * np.log(probs_left)) else: tot_l = sum(cl_counts) probs_left = (cl_counts / float(tot_l)) E_left = -1 * sum(probs_left * np.log(probs_left)) if sum(cr_counts) == 0: tot_r = 0 probs_right = np.ones(10) * 0.1 E_right = -1 * sum(probs_right * np.log(probs_right)) else: tot_r = sum(cr_counts) probs_right = (cr_counts / float(tot_r)) E_right = -1 * sum(probs_right * np.log(probs_right)) IG = E_tot - (tot_l / (1. * n)) * E_left - (tot_r / (1. * n)) * E_right res.append([IG, ri, i, probs_left, probs_right]) return res x, y = mnist.read_mnist_training_data(10000) gt_stump_v2(x, y) tempvec = [(res[i][0], i) for i in range(len(res))] tempvec.sort() s1 = tempvec[-1] stump = r1[s1[1]]
import numpy as np import mnist_load_show as mnist ''' use pdis in order to find the the distance ''' from scipy.spatial.distance import cdist """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ student_ID = '013593012' TOTAL_IMAGES = 5000 ### LOAD ########THE DATA ######################### X, y = mnist.read_mnist_training_data(TOTAL_IMAGES) ################################################### def split(X, y): XTrain = [] XTest = [] yTrain = [] yTest = [] length = len(X) half = length / 2 for i in range(0, half): XTrain.append(X[i])
import numpy as np import mnist_load_show from sklearn.metrics import confusion_matrix """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ student_ID = '014632888' TRAINING_SIZE = 30000 TEST_SIZE = 30000 NUM_EPOCHS = 10 X, Y = mnist_load_show.read_mnist_training_data() training_set = X[:TRAINING_SIZE] training_labels = Y[:TRAINING_SIZE] test_set = X[TRAINING_SIZE : TRAINING_SIZE + TEST_SIZE] test_labels = Y[TRAINING_SIZE: TRAINING_SIZE + TEST_SIZE] # adding the bias column training_set = np.insert(training_set, 0, 1, axis=1) test_set = np.insert(test_set, 0, 1, axis=1) DIMENSIONS = len(training_set[0]) def predict_one_vs_all_label(x, W): x = x.transpose() return np.argmax(W.dot(x))
for i in range(len(mediod_indices)): curr_cluster_indices = np.argwhere(cluster_indices == i).flatten() reduced_dissimilarity = dissimilarity[ curr_cluster_indices].transpose()[curr_cluster_indices] new_mediod_reduced_i = np.argmin( np.sum(reduced_dissimilarity, axis=1)) new_mediod_i = curr_cluster_indices[new_mediod_reduced_i] new_mediod_indices[i] = new_mediod_i changed = (new_mediod_indices != mediod_indices).any() mediod_indices = np.copy(new_mediod_indices) return new_mediod_indices, cluster_indices X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE) first_ten = X[:10] # select first instance of each label first_label_instance = np.array([X[np.where(Y == i)[0][0]] for i in range(10)]) cluster_means, clusters = k_means(X, first_ten) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) cluster_means, clusters = k_means(X, first_label_instance) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))
new_mediod_indices = np.zeros(mediod_indices.shape) for i in range(len(mediod_indices)): curr_cluster_indices = np.argwhere(cluster_indices == i).flatten() reduced_dissimilarity = dissimilarity[curr_cluster_indices].transpose()[curr_cluster_indices] new_mediod_reduced_i = np.argmin(np.sum(reduced_dissimilarity, axis=1)) new_mediod_i = curr_cluster_indices[new_mediod_reduced_i] new_mediod_indices[i] = new_mediod_i changed = (new_mediod_indices != mediod_indices).any() mediod_indices = np.copy(new_mediod_indices) return new_mediod_indices, cluster_indices X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE) first_ten = X[:10] # select first instance of each label first_label_instance = np.array([ X[np.where(Y == i)[0][0]] for i in range(10) ]) cluster_means, clusters = k_means(X, first_ten) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) cluster_means, clusters = k_means(X, first_label_instance) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0))
def main(): test_number = 1 while load_parameters(test_number): output = open(output_file, 'w') output.write("Test started.\n\n") output.close() # Read data here full_mnist_data, full_mnist_labels = mnist.read_mnist_training_data(train_size + test_size) training_data = [full_mnist_data[:train_size], full_mnist_labels[:train_size]] test_data = [full_mnist_data[train_size:], full_mnist_labels[train_size:]] # Vectorize labels training_data[1] = label_vectors_from_indicies(training_data[1], 10) test_data[1] = label_vectors_from_indicies(test_data[1], 10) # Normalize data training_data[0] = (training_data[0].astype(float) - training_data[0].mean()) / 255.0 test_data[0] = (test_data[0].astype(float) - test_data[0].mean()) / 255.0 # Modified data set for autoencoder autoencoder_training_data = [training_data[0], training_data[0]] autoencoder_test_data = [test_data[0], test_data[0]] # Initialize network(s) here input_size = (28 * 28) # Pixels in the image output_size = 10 # Possible classifications layer_sizes = np.asarray([input_size] + feedforward_classifier_hidden_layers + [output_size]) initialization_params = [layer_sizes] feedforward_classifier_state = None feedforward_classifier_connections = None [feedforward_classifier_state, feedforward_classifier_connections] = init_feedforward_classifier( initialization_params) # Change network shape for auto-encoder autoencoder_layer_sizes = np.asarray([input_size] + autoencoder_hidden_layers + [input_size]) autoencoder_initialization_params = [autoencoder_layer_sizes] autoencoder_state = None autoencoder_connections = None [autoencoder_state, autoencoder_connections] = init_autoencoder(autoencoder_initialization_params) # Two sets of parameters for autoencoder classifier autoencoder_classifier_classifier_layer_sizes = np.asarray([autoencoder_classifier_autoencoder_hidden_layers[-1]] + autoencoder_classifier_classifier_hidden_layers + [output_size]) autoencoder_classifier_autoencoder_layer_sizes = np.asarray([input_size] + autoencoder_classifier_autoencoder_hidden_layers + [input_size]) autoencoder_classifier_init_params = [[autoencoder_classifier_autoencoder_layer_sizes], [autoencoder_classifier_classifier_layer_sizes]] autoencoder_classifier_state = None autoencoder_classifier_connections = None [autoencoder_classifier_state, autoencoder_classifier_connections] = init_autoencoder_classifier( autoencoder_classifier_init_params) # Train network(s) here training_params = [training_runs] if not skip_feedforward_classifier: feedforward_classifier_connections = train_feedforward_classifier(feedforward_classifier_state, feedforward_classifier_connections, training_data, training_params) if not skip_autoencoder: autoencoder_connections = train_autoencoder(autoencoder_state, autoencoder_connections, autoencoder_training_data, training_params) if not skip_autoencoder_classifier: [autoencoder_classifier_state, autoencoder_classifier_connections] = train_autoencoder_classifier( autoencoder_classifier_state, autoencoder_classifier_connections, training_data, training_params) # Test network(s) here test_params = None if not skip_feedforward_classifier: test_feedforward_classifier(feedforward_classifier_state, feedforward_classifier_connections, test_data, test_params) if not skip_autoencoder: test_autoencoder(autoencoder_state, autoencoder_connections, autoencoder_test_data, test_params) if not skip_autoencoder_classifier: test_autoencoder_classifier(autoencoder_classifier_state, autoencoder_classifier_connections, test_data, test_params) output = open(output_file, 'a') output.write("Test finished.\n") output.close() test_number += 1
import numpy as np import mnist_load_show from sklearn.metrics import confusion_matrix """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ student_ID = '014632888' TRAINING_SIZE = 30000 TEST_SIZE = 30000 NUM_EPOCHS = 10 X, Y = mnist_load_show.read_mnist_training_data() training_set = X[:TRAINING_SIZE] training_labels = Y[:TRAINING_SIZE] test_set = X[TRAINING_SIZE:TRAINING_SIZE + TEST_SIZE] test_labels = Y[TRAINING_SIZE:TRAINING_SIZE + TEST_SIZE] # adding the bias column training_set = np.insert(training_set, 0, 1, axis=1) test_set = np.insert(test_set, 0, 1, axis=1) DIMENSIONS = len(training_set[0]) def predict_one_vs_all_label(x, W): x = x.transpose() return np.argmax(W.dot(x))
''' use pdis in order to find the the distance ''' from scipy.spatial.distance import cdist """ ============================================ DO NOT FORGET TO INCLUDE YOUR STUDENT ID ============================================ """ student_ID = '013593012' TOTAL_IMAGES = 5000 ### LOAD ########THE DATA ######################### X, y = mnist.read_mnist_training_data(TOTAL_IMAGES) ################################################### def split(X, y): XTrain = [] XTest = [] yTrain = [] yTest = [] length = len(X) half = length / 2 for i in range(0, half): XTrain.append(X[i])