def Order(train_data, train_labels): train_data_new = data.get_digits_by_label(train_data, train_labels, 0) train_labels_new = np.zeros((train_data.shape[0] // 10, 1)) for i in range(1, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) train_data_new = np.vstack((train_data_new, i_digits)) i_labels = np.zeros((train_data.shape[0] // 10, 1)) + i train_labels_new = np.vstack((train_labels_new, i_labels)) train_data = train_data_new train_labels = np.squeeze(train_labels_new) return train_data, train_labels
def plot_means(train_data, train_labels): means = [] zero = data.get_digits_by_label(train_data, train_labels, 0) ones = data.get_digits_by_label(train_data, train_labels, 1) for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i # plt.imshow(i_digits[0].reshape(8,8)+i_digits[1], cmap='gray') # plt.show() means.append(np.mean(i_digits, axis=0).reshape(64, 1)) means = np.array(means) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) # Compute covariances for digit in range(10): digit_data = data.get_digits_by_label(train_data, train_labels, digit) # compute means for each digit digit_means = digit_data.mean(0).reshape((1, 64)) # get total number of data for each digit num_of_data = digit_data.shape[0] cov_for_digit = np.zeros((64, 64)) # loop over each train data obtained for each digit for index in range(num_of_data): x = digit_data[index, ].reshape((1, 64)) diff = x - digit_means cov_for_digit = cov_for_digit + np.dot(diff.T, diff) # add 0.01I to ensure stability for_stability = np.identity(64) * 0.01 covariances[digit, :, :] = (cov_for_digit / num_of_data) + for_stability # print(covariances) return covariances
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means # ~ 0 0 0 0 0 0 0 0 # ~ 0 0 0 0 0 0 0 0 # ~ we have a 10 x 64 matrix # ~ we have 10 entries, each a 64-entry vector, corresponding to each class for i in range(0,10): iDigits = data.get_digits_by_label(train_data, train_labels, i) meanDigits = np.mean(iDigits,axis=0) means[i,:] = meanDigits # ~ 10 x 64 matrix -> each row corresponds to a digit # ~ since each image is 8 x 8, we have it flattened into a 1 x 64 # ~ so each row is the mean values of all digits corresponding to that class return means
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) # Compute covariances test_cov = np.zeros((10, 64, 64)) for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) #print "idigit", i_digits[:,i].shape #i digits 700 by 64 #construct 64 by 64 for ii in range(0, 64): for jj in range(0, 64): #print "-------------covar----------" #*this is verified with np cov i_cov_column = cov_vector(i_digits[:,ii], i_digits[:,jj]) #print i_cov_column covariances[i][ii][jj] = i_cov_column iden_matrix = 0.01*np.identity(64) np.add(iden_matrix, covariances[i]) return covariances
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' eta = np.zeros((10, 64)) a = 2 #alpha value b = 2 #beta value #loop through each digit and each feature to find thetamap for i in range(0, eta.shape[0]): current_data = data.get_digits_by_label( train_data, train_labels, i) #get the data realated to the current digit current_features = np.zeros( (current_data.shape[1])) #create any empty array of 64 features for feature in range( 0, current_data.shape[1]): #for each feature caluculate theta_map Nc = np.count_nonzero( current_data[:, feature] ) #count number of ones for current feature accross all current_datapoints(datapoints for current digit) N = current_data.shape[0] #total number of current data points thetha_map = (Nc + a - 1) / (N + a + b - 2) current_features[ feature] = thetha_map #save thethamap to this feature eta[i] = current_features #save the array of features for this digit return eta
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means x = train_data t = train_labels # 1. categorize all pictures into 10 classes # 2. find mean estimate for each class (1,64) # 3. combine all mean estimate into a matrix => means for k in range(10): pics_k = data.get_digits_by_label(x, t, k) tot_num = len(pics_k) tot_pixels = np.zeros((1, 64)) for i in range(len(pics_k)): tot_pixels += pics_k[i] means[k] = tot_pixels / tot_num return means
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) # Compute covariances x = train_data t = train_labels means = compute_mean_mles(train_data, train_labels) # (10,64) # 1. categorize all pictures into 10 classes # 2. find sigma estimate for each class (1,64,64) # 3. combine all sigma estimate into a matrix => covariances for k in range(10): mean_k = means[k] # (1, 64) tot_pixels_cov = np.zeros((64, 64)) pics_k = data.get_digits_by_label(x, t, k) tot_num = len(pics_k) for i in range(len(pics_k)): xk_minus_meank = pics_k[i].reshape((64, 1)) - mean_k.reshape( (64, 1)) tot_pixels_cov += np.matmul(xk_minus_meank, xk_minus_meank.T) covariances[k] = 0.01 * np.identity(64) + tot_pixels_cov / tot_num return covariances
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) # Compute covariances # ~ covariance matrix is a 64 x 64 matrix, and we have 10 of these, one per class # ~ so 10 x 64 x 64 is our return # ~ meanVals = compute_mean_mles(train_data, train_labels) for i in range(0,10): iDigits = data.get_digits_by_label(train_data, train_labels, i) for j in range(0,64): x1 = np.transpose(iDigits[:,j]) for k in range(0,64): x2 = np.transpose(iDigits[:,k]) covariances[i,j,k] = cov(x1,x2,meanVals[i,j],meanVals[i,k]) epsilon = 0.01 * np.identity(64) # ~ stability covariances[i,:,:] = covariances[i,:,:] + epsilon return covariances
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' trainBinary = binarize_data(train_data) # ~ WE HAVE all the data # ~ first : divide it based on class labels # ~ second : for elements 0 to 63 in the matrix, in class k, nkj is # ~ just the sum of 1's in the column over all points eta = np.zeros((10, 64)) for k in range(0,10): digitsClassK = data.get_digits_by_label(trainBinary, train_labels, k) digitsShape = np.shape(digitsClassK) numPoints = digitsShape[0] numFeats = digitsShape[1] for j in range(0,numFeats): featSum = np.sum(digitsClassK[:,j]) eta[k,j] = (featSum + 1) / (numPoints + 2) # ~ + 1 and + 2 equivalent to adding two data points, one all 0s and one all 1s # ~ from naive bayes beta distro slides, equivalent to MAP estimation prior return eta
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) means = compute_mean_mles(train_data, train_labels) d = 64 for i in range(10): i_digits = data.get_digits_by_label(train_data, train_labels, i) for j in range(d): for k in range(d): var_1 = i_digits[:, j] - means[i, j] var_2 = i_digits[:, k] - means[i, k] var = np.mean(var_1 * var_2, 0) covariances[i, j, k] = var if j == k: covariances[i, j, k] += 0.01 return covariances
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' #make a hash table list, i is label, nc is total count eta = np.zeros((10, 64)) nc = np.zeros((10, 64)) #for each class k, count the 1st pixel in 700 vectors that is one #add the beta distribution for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) for j in range(0, 700): for k in range(0, 64): if i_digits[j][k] == 1: nc[i][k] += 1 #calculate beta(2,2) for i in range(0, 10): for j in range(0, 64): eta[i][j] = 1.0 * (nc[i][j] + 2 - 1) / (700 + 2 + 2 + -2) #print "nc_list", (nc) #print eta return eta
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i means.append(np.reshape(np.mean(i_digits, axis=0), (-1, 8))) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) means.append( np.sum(np.array(i_digits), axis=0).reshape(8, 8) / len(i_digits)) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i means.append(np.mean(i_digits, axis=0).reshape((8,8))) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.title("Means of Pixel Values for each Digit Class") plt.imshow(all_concat, cmap='gray') plt.show()
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i temp = np.sum(i_digits, axis=0)/700.0; temp = temp.reshape((8,8)); means.append(temp); # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def plot_means(train_data, train_labels): means = np.zeros((10, 64)) for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) means[i, :] = np.mean(i_digits, axis=0) # Compute mean of class i plt.figure(figsize=(20, 5)) num = means.shape[0] for i in range(num): plt.subplot(2, 5, i + 1) plt.imshow(means[i, :].reshape((8, 8)), cmap='gray') plt.tight_layout() plt.show()
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means for k in range(10): X = data.get_digits_by_label(train_data, train_labels, k) means[k] = np.sum(X, axis=0) / X.shape[0] return means
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i i_mean = (sum(i_digits[:, ]) / i_digits.shape[0]).reshape(8, 8) means.append(i_mean) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means for digit in range(10): digit_data = data.get_digits_by_label(train_data, train_labels, digit) means[digit] = np.mean(digit_data, axis=0) return means
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i # print(i_digits.shape) mean_digit = np.mean(i_digits, 0) mean_digit = np.reshape(mean_digit, (8, 8)) means.append(mean_digit) # Plot all means on same axis # print("mean's shape is: {}".format(np.reshape(means, (8,8,10)).shape)) all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) mean_digit = np.mean(i_digits, 0) means[i, :] = mean_digit return means
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' covariances = np.zeros((10, 64, 64)) # Compute covariances means = compute_mean_mles(train_data, train_labels) for k in range(10): X = data.get_digits_by_label(train_data, train_labels, k) covariances[k] = ((X - means[k]).T @ (X - means[k]) ) / X.shape[0] + 0.01 * np.identity(64) return covariances
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data Returns a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' eta = np.zeros((10, 64)) for i in np.arange(10): tmp_data = data.get_digits_by_label(train_data, train_labels, i) N_kj = np.sum(tmp_data, axis=0).reshape((1, eta.shape[1])) N_k = tmp_data.shape[0] eta[i] = (N_kj+1)/(N_k+2) return eta
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' eta = np.zeros((10, 64)) for i in range(10): i_digits = data.get_digits_by_label(train_data, train_labels, i) total = i_digits.shape[0] p = (np.sum(i_digits, axis=0) + 1) / (total + 2) eta[i] = p return eta
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' eta = np.zeros((10, 64)) for digit in range(10): digit_data = data.get_digits_by_label(train_data, train_labels, digit) total = digit_data.shape[0] theta = (np.sum(digit_data, axis=0) + a - 1) / (total + a + b - 2) eta[digit] = theta return eta
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i sum_digit = np.sum(i_digits, axis=0) mean_vecotr = sum_digit / len(i_digits) mean = mean_vecotr.reshape((8, 8)) means.append(mean) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.show()
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' # Initialize array to store means means = np.zeros((10, 64)) # Compute means for i in range(10): sample = data.get_digits_by_label(train_data, train_labels, i) means[i] = np.mean(sample, 0) return means
def plot_means(train_data, train_labels): means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i arr = np.sum(i_digits / 700, axis=0) arr = np.reshape(arr, (8, 8)) means.append(arr) # Plot all means on same axis all_concat = np.concatenate(means, 1) plt.imshow(all_concat, cmap='gray') plt.savefig("Mean of Handwritten Digits.pdf") plt.show()
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = [] for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # Compute mean of class i means.append(np.mean(i_digits, axis=0)) return np.array(means)
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' eta = np.zeros((10, 64)) for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) for j in range(0, train_data.shape[1]): eta[i][j] = (np.sum(i_digits[:, j]) + 2 - 1) / (i_digits.shape[0] + 2 + 2 - 2) return eta
def compute_mean_mles(train_data, train_labels): ''' Compute the mean estimate for each digit class Should return a numpy array of size (10,64) The ith row will correspond to the mean estimate for digit class i ''' means = np.zeros((10, 64)) # Compute means for i in range(0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) i_mean = (sum(i_digits[:, ]) / i_digits.shape[0]) means[i] = i_mean return means
def compute_parameters(train_data, train_labels): ''' Compute the eta MAP estimate/MLE with augmented data You should return a numpy array of shape (10, 64) where the ith row corresponds to the ith digit class. ''' a = 2 b = 2 eta = 0.01*np.ones((10, 64)) for i in range(0,10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # (700, 64) N = len(i_digits) Nc = np.sum(i_digits, axis=0) eta[i] += (Nc + a - 1)/(N + a + b - 2) return eta
def compute_sigma_mles(train_data, train_labels): ''' Compute the covariance estimate for each digit class Should return a three dimensional numpy array of shape (10, 64, 64) consisting of a covariance matrix for each digit class ''' means = compute_mean_mles(train_data, train_labels) covariances = np.zeros((10, 64, 64)) for i in range (0, 10): i_digits = data.get_digits_by_label(train_data, train_labels, i) # (700,64) N = i_digits.shape[0] covariances[i] = 0.01*np.identity(i_digits.shape[1]) for j in range(0, 64): for k in range(0, 64): # [] - digit, [] - row (first variable), [] - column (second variable) covariances[i][j][k] += i_digits[:,j].dot(i_digits[:,k])/(N-1) - means[i][j]*means[i][k] return covariances