Esempio n. 1
0
def Order(train_data, train_labels):
    train_data_new = data.get_digits_by_label(train_data, train_labels, 0)
    train_labels_new = np.zeros((train_data.shape[0] // 10, 1))
    for i in range(1, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        train_data_new = np.vstack((train_data_new, i_digits))
        i_labels = np.zeros((train_data.shape[0] // 10, 1)) + i
        train_labels_new = np.vstack((train_labels_new, i_labels))
    train_data = train_data_new
    train_labels = np.squeeze(train_labels_new)
    return train_data, train_labels
Esempio n. 2
0
def plot_means(train_data, train_labels):
    means = []
    zero = data.get_digits_by_label(train_data, train_labels, 0)
    ones = data.get_digits_by_label(train_data, train_labels, 1)
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        # plt.imshow(i_digits[0].reshape(8,8)+i_digits[1], cmap='gray')
        # plt.show()
        means.append(np.mean(i_digits, axis=0).reshape(64, 1))
    means = np.array(means)
    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 3
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class 
    '''
    covariances = np.zeros((10, 64, 64))
    # Compute covariances
    for digit in range(10):
        digit_data = data.get_digits_by_label(train_data, train_labels, digit)

        # compute means for each digit
        digit_means = digit_data.mean(0).reshape((1, 64))

        # get total number of data for each digit
        num_of_data = digit_data.shape[0]

        cov_for_digit = np.zeros((64, 64))
        # loop over each train data obtained for each digit
        for index in range(num_of_data):
            x = digit_data[index, ].reshape((1, 64))
            diff = x - digit_means
            cov_for_digit = cov_for_digit + np.dot(diff.T, diff)

        # add 0.01I to ensure stability
        for_stability = np.identity(64) * 0.01
        covariances[digit, :, :] = (cov_for_digit /
                                    num_of_data) + for_stability

    # print(covariances)
    return covariances
Esempio n. 4
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))
    # Compute means

    # ~ 0 0 0 0 0 0 0 0
    # ~ 0 0 0 0 0 0 0 0

    # ~ we have a 10 x 64 matrix
    # ~ we have 10 entries, each a 64-entry vector, corresponding to each class

    for i in range(0,10):
        iDigits = data.get_digits_by_label(train_data, train_labels, i)
        meanDigits = np.mean(iDigits,axis=0)
        means[i,:] = meanDigits 

    # ~ 10 x 64 matrix -> each row corresponds to a digit
    # ~ since each image is 8 x 8, we have it flattened into a 1 x 64 
    # ~ so each row is the mean values of all digits corresponding to that class

    return means
Esempio n. 5
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class 
    '''
    covariances = np.zeros((10, 64, 64))
    # Compute covariances
    
    test_cov = np.zeros((10, 64, 64))
    for i in range(0, 10):
  
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        #print "idigit", i_digits[:,i].shape #i digits 700 by 64

        #construct 64 by 64
        for ii in range(0, 64):
            for jj in range(0, 64):
                #print "-------------covar----------"
                #*this is verified with np cov
                i_cov_column = cov_vector(i_digits[:,ii], i_digits[:,jj])
                
                #print i_cov_column  
                covariances[i][ii][jj] = i_cov_column
            iden_matrix = 0.01*np.identity(64)
            np.add(iden_matrix, covariances[i])
      
    

    return covariances
Esempio n. 6
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    eta = np.zeros((10, 64))
    a = 2  #alpha value
    b = 2  #beta value

    #loop through each digit and each feature to find thetamap
    for i in range(0, eta.shape[0]):
        current_data = data.get_digits_by_label(
            train_data, train_labels,
            i)  #get the data realated to the current digit
        current_features = np.zeros(
            (current_data.shape[1]))  #create any empty array of 64 features
        for feature in range(
                0,
                current_data.shape[1]):  #for each feature caluculate theta_map
            Nc = np.count_nonzero(
                current_data[:, feature]
            )  #count number of ones for current feature accross all current_datapoints(datapoints for current digit)
            N = current_data.shape[0]  #total number of current data points
            thetha_map = (Nc + a - 1) / (N + a + b - 2)
            current_features[
                feature] = thetha_map  #save thethamap to this feature
        eta[i] = current_features  #save the array of features for this digit

    return eta
Esempio n. 7
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))

    # Compute means
    x = train_data
    t = train_labels

    # 1. categorize all pictures into 10 classes
    # 2. find mean estimate for each class (1,64)
    # 3. combine all mean estimate into a matrix => means

    for k in range(10):
        pics_k = data.get_digits_by_label(x, t, k)
        tot_num = len(pics_k)
        tot_pixels = np.zeros((1, 64))
        for i in range(len(pics_k)):
            tot_pixels += pics_k[i]
        means[k] = tot_pixels / tot_num

    return means
Esempio n. 8
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class
    '''
    covariances = np.zeros((10, 64, 64))
    # Compute covariances
    x = train_data
    t = train_labels
    means = compute_mean_mles(train_data, train_labels)  # (10,64)

    # 1. categorize all pictures into 10 classes
    # 2. find sigma estimate for each class (1,64,64)
    # 3. combine all sigma estimate into a matrix => covariances

    for k in range(10):
        mean_k = means[k]  # (1, 64)
        tot_pixels_cov = np.zeros((64, 64))
        pics_k = data.get_digits_by_label(x, t, k)
        tot_num = len(pics_k)
        for i in range(len(pics_k)):
            xk_minus_meank = pics_k[i].reshape((64, 1)) - mean_k.reshape(
                (64, 1))
            tot_pixels_cov += np.matmul(xk_minus_meank, xk_minus_meank.T)
        covariances[k] = 0.01 * np.identity(64) + tot_pixels_cov / tot_num

    return covariances
Esempio n. 9
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class 
    '''
    covariances = np.zeros((10, 64, 64))
    # Compute covariances

    # ~ covariance matrix is a 64 x 64 matrix, and we have 10 of these, one per class 
    # ~ so 10 x 64 x 64 is our return 
    # ~

    meanVals = compute_mean_mles(train_data, train_labels)

    for i in range(0,10):
        iDigits = data.get_digits_by_label(train_data, train_labels, i)
        for j in range(0,64):
            x1 = np.transpose(iDigits[:,j])
            for k in range(0,64):
                x2 = np.transpose(iDigits[:,k])
                covariances[i,j,k] = cov(x1,x2,meanVals[i,j],meanVals[i,k])

        epsilon = 0.01 * np.identity(64) # ~ stability
        covariances[i,:,:] = covariances[i,:,:] + epsilon


    return covariances
Esempio n. 10
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''

    trainBinary = binarize_data(train_data)

    # ~ WE HAVE all the data
    # ~ first : divide it based on class labels
    # ~ second : for elements 0 to 63 in the matrix, in class k, nkj is 
    # ~         just the sum of 1's in the column over all points

    eta = np.zeros((10, 64))

    for k in range(0,10):
        digitsClassK = data.get_digits_by_label(trainBinary, train_labels, k)
        digitsShape = np.shape(digitsClassK)
        numPoints = digitsShape[0]
        numFeats = digitsShape[1]
        for j in range(0,numFeats):
            featSum = np.sum(digitsClassK[:,j])
            eta[k,j] = (featSum + 1) / (numPoints + 2) 
            # ~ + 1 and + 2 equivalent to adding two data points, one all 0s and one all 1s
            # ~ from naive bayes beta distro slides, equivalent to MAP estimation prior


    
    return eta
Esempio n. 11
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class 
    '''
    covariances = np.zeros((10, 64, 64))

    means = compute_mean_mles(train_data, train_labels)

    d = 64

    for i in range(10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)

        for j in range(d):
            for k in range(d):
                var_1 = i_digits[:, j] - means[i, j]
                var_2 = i_digits[:, k] - means[i, k]
                var = np.mean(var_1 * var_2, 0)

                covariances[i, j, k] = var

                if j == k:
                    covariances[i, j, k] += 0.01

    return covariances
Esempio n. 12
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    #make a hash table list, i is label, nc is total count
    eta = np.zeros((10, 64))
    nc = np.zeros((10, 64))
    #for each class k, count the 1st pixel in 700 vectors that is one
    #add the beta distribution
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        for j in range(0, 700):
            for k in range(0, 64):
                if i_digits[j][k] == 1:
                    nc[i][k] += 1
    #calculate beta(2,2)
    for i in range(0, 10):
        for j in range(0, 64):
            eta[i][j] = 1.0 * (nc[i][j] + 2 - 1) / (700 + 2 + 2 + -2)

    #print "nc_list", (nc)
    #print eta

    return eta
Esempio n. 13
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        means.append(np.reshape(np.mean(i_digits, axis=0), (-1, 8)))

    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 14
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        means.append(
            np.sum(np.array(i_digits), axis=0).reshape(8, 8) / len(i_digits))

    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 15
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        means.append(np.mean(i_digits, axis=0).reshape((8,8)))
        
    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.title("Means of Pixel Values for each Digit Class")
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 16
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        temp = np.sum(i_digits, axis=0)/700.0;
        temp = temp.reshape((8,8));
        means.append(temp);
    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
def plot_means(train_data, train_labels):
    means = np.zeros((10, 64))
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        means[i, :] = np.mean(i_digits, axis=0)
        # Compute mean of class i
    plt.figure(figsize=(20, 5))
    num = means.shape[0]
    for i in range(num):
        plt.subplot(2, 5, i + 1)
        plt.imshow(means[i, :].reshape((8, 8)), cmap='gray')
    plt.tight_layout()
    plt.show()
Esempio n. 18
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))
    # Compute means
    for k in range(10):
        X = data.get_digits_by_label(train_data, train_labels, k)
        means[k] = np.sum(X, axis=0) / X.shape[0]
    return means
Esempio n. 19
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)

        # Compute mean of class i
        i_mean = (sum(i_digits[:, ]) / i_digits.shape[0]).reshape(8, 8)

        means.append(i_mean)
    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 20
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))
    # Compute means
    for digit in range(10):
        digit_data = data.get_digits_by_label(train_data, train_labels, digit)
        means[digit] = np.mean(digit_data, axis=0)
    return means
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        # print(i_digits.shape)
        mean_digit = np.mean(i_digits, 0)
        mean_digit = np.reshape(mean_digit, (8, 8))
        means.append(mean_digit)
    # Plot all means on same axis
    # print("mean's shape is: {}".format(np.reshape(means, (8,8,10)).shape))
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))
    # Compute means
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        mean_digit = np.mean(i_digits, 0)
        means[i, :] = mean_digit
    return means
Esempio n. 23
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class
    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class
    '''
    covariances = np.zeros((10, 64, 64))
    # Compute covariances
    means = compute_mean_mles(train_data, train_labels)
    for k in range(10):
        X = data.get_digits_by_label(train_data, train_labels, k)
        covariances[k] = ((X - means[k]).T @ (X - means[k])
                          ) / X.shape[0] + 0.01 * np.identity(64)
    return covariances
Esempio n. 24
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    Returns a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    eta = np.zeros((10, 64))
    for i in np.arange(10):
        tmp_data = data.get_digits_by_label(train_data, train_labels, i)
        N_kj = np.sum(tmp_data, axis=0).reshape((1, eta.shape[1]))
        N_k = tmp_data.shape[0]
        eta[i] = (N_kj+1)/(N_k+2)
    return eta
Esempio n. 25
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    eta = np.zeros((10, 64))
    for i in range(10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        total = i_digits.shape[0]
        p = (np.sum(i_digits, axis=0) + 1) / (total + 2)
        eta[i] = p
    return eta
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    eta = np.zeros((10, 64))
    for digit in range(10):
        digit_data = data.get_digits_by_label(train_data, train_labels, digit)
        total = digit_data.shape[0]
        theta = (np.sum(digit_data, axis=0) + a - 1) / (total + a + b - 2)
        eta[digit] = theta
    return eta
Esempio n. 27
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        sum_digit = np.sum(i_digits, axis=0)
        mean_vecotr = sum_digit / len(i_digits)
        mean = mean_vecotr.reshape((8, 8))
        means.append(mean)

    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.show()
Esempio n. 28
0
    def compute_mean_mles(train_data, train_labels):
        '''
        Compute the mean estimate for each digit class

        Should return a numpy array of size (10,64)
        The ith row will correspond to the mean estimate for digit class i
        '''
	# Initialize array to store means
        means = np.zeros((10, 64))
        # Compute means
        for i in range(10):
            sample = data.get_digits_by_label(train_data, train_labels, i)
            means[i] = np.mean(sample, 0)
        return means
Esempio n. 29
0
def plot_means(train_data, train_labels):
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        arr = np.sum(i_digits / 700, axis=0)
        arr = np.reshape(arr, (8, 8))
        means.append(arr)

    # Plot all means on same axis
    all_concat = np.concatenate(means, 1)
    plt.imshow(all_concat, cmap='gray')
    plt.savefig("Mean of Handwritten Digits.pdf")
    plt.show()
Esempio n. 30
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = []
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        # Compute mean of class i
        means.append(np.mean(i_digits, axis=0))
        
    return np.array(means)
Esempio n. 31
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    eta = np.zeros((10, 64))
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        for j in range(0, train_data.shape[1]):
            eta[i][j] = (np.sum(i_digits[:, j]) + 2 - 1) / (i_digits.shape[0] +
                                                            2 + 2 - 2)

    return eta
Esempio n. 32
0
def compute_mean_mles(train_data, train_labels):
    '''
    Compute the mean estimate for each digit class

    Should return a numpy array of size (10,64)
    The ith row will correspond to the mean estimate for digit class i
    '''
    means = np.zeros((10, 64))
    # Compute means
    for i in range(0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i)
        i_mean = (sum(i_digits[:, ]) / i_digits.shape[0])
        means[i] = i_mean

    return means
Esempio n. 33
0
def compute_parameters(train_data, train_labels):
    '''
    Compute the eta MAP estimate/MLE with augmented data

    You should return a numpy array of shape (10, 64)
    where the ith row corresponds to the ith digit class.
    '''
    a = 2
    b = 2
    eta = 0.01*np.ones((10, 64))
    
    for i in range(0,10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i) # (700, 64)
        N = len(i_digits)
        Nc = np.sum(i_digits, axis=0)
        eta[i] += (Nc + a - 1)/(N + a + b - 2)
        
    return eta
Esempio n. 34
0
def compute_sigma_mles(train_data, train_labels):
    '''
    Compute the covariance estimate for each digit class

    Should return a three dimensional numpy array of shape (10, 64, 64)
    consisting of a covariance matrix for each digit class 
    '''
    means = compute_mean_mles(train_data, train_labels)
    covariances = np.zeros((10, 64, 64))

    for i in range (0, 10):
        i_digits = data.get_digits_by_label(train_data, train_labels, i) # (700,64)
        N = i_digits.shape[0]
        covariances[i] = 0.01*np.identity(i_digits.shape[1])
        for j in range(0, 64):
            for k in range(0, 64):
                # [] - digit, [] - row (first variable), [] - column (second variable)
                covariances[i][j][k] += i_digits[:,j].dot(i_digits[:,k])/(N-1) - means[i][j]*means[i][k] 

    return covariances