Exemple #1
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)

    sel_error = np.array(
        [0])  # Numpy indices to select images that are misclassified.
    i = 0  # should be the label number corresponding the largest classification error

    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='Predicted class')
Exemple #2
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########
    C = 3e-4
    linSVM = svm.SVC(kernel="linear", decision_function_shape='ovr', C=C)
    linSVM.fit(x_train, y_train)

    labels = range(1, 6)
    y_pred = linSVM.predict(x_test)

    conf_matrix = confusion_matrix(y_test, y_pred)

    plot_confusion_matrix(conf_matrix, labels)

    most_misclassified_prob = []
    for col in range(conf_matrix.shape[1]):
        item_count = 0
        most_misclassified_value = 0

        for row in range(conf_matrix.shape[0]):
            item_count += conf_matrix[row, col]
            if row != col and most_misclassified_value < conf_matrix[row, col]:
                most_misclassified_value = conf_matrix[row, col]

        most_misclassified_prob.append(most_misclassified_value / item_count)

    i = np.argmax(most_misclassified_prob) + 1

    sel_error = np.array(
        [])  # Numpy indices to select images that are misclassified.

    for j in range(y_pred.shape[0]):
        if y_pred[j] == i and y_pred[j] != y_test[j]:
            sel_error = np.append(sel_error, j)
            if len(sel_error) == 10:
                break

    sel_error = sel_error.astype(int)

    # Plot with mnist plot
    plot_mnist(x_test[sel_error],
               y_pred[sel_error],
               labels=labels[i - 1],
               k_plots=10,
               prefix='Predicted class')
def ex_3():
    data = load_data('data_mnist.json')
    x_train, y_train, x_test, y_test = \
        data['X'], data['Y'].ravel(), data['XT'], data['YT'].ravel()

    plot_mnist(x_train, y_train)

    ex_3_a(x_train, y_train, x_test, y_test)
Exemple #4
0
def ex_3():
    data = load_data('data_mnist.json')
    # Normalize data from [0,255] to [0,1]
    x_train, y_train, x_test, y_test = \
        data['X'] / 255, data['Y'].ravel(), data['XT'] / 255, data['YT'].ravel()

    plot_mnist(x_train, y_train)

    ex_3_a(x_train, y_train, x_test, y_test)
    ex_3_b(x_train, y_train, x_test, y_test)
Exemple #5
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 images classified as the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)

    lin = svm.SVC(decision_function_shape='ovr', kernel='linear')
    lin.fit(x_train, y_train)

    y_test_predict =lin.predict(x_test)

    score_train = lin.score(x_train, y_train)
    score_test = lin.score(x_test, y_test)

    cm = confusion_matrix(y_test, y_test_predict)
    plot_confusion_matrix(cm, labels)
    #print(cm)

    diff_list = y_test_predict == y_test

    # indexes of all missclassiefied images
    misclassifieds = [i for i, val in enumerate(diff_list) if val == False]

    # remove diagonal elements from cm for later processing
    cm_no_diagonal = cm
    np.fill_diagonal(cm_no_diagonal, 0)
    #print(cm_no_diagonal)

    errors_per_class = np.sum(cm_no_diagonal, axis=0)
    #print(errors_per_class)

    sel_err = np.array(misclassifieds)  # CHANGE ME! Numpy indices to select all images that are misclassified.
    i = np.argmax(errors_per_class)  # CHANGE ME! Should be the label number corresponding the largest classification error.
    #print(i)

    # Plot with mnist plot
    plot_mnist(x_test[sel_err], y_test_predict[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')
Exemple #6
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)

    SVMlin = svm.SVC(decision_function_shape='ovr', C=10, kernel='linear')
    SVMlin.fit(x_train, y_train)
    scorelin_train = SVMlin.score(x_train, y_train)
    scorelin_test = SVMlin.score(x_test, y_test)
    y_pred = SVMlin.predict(x_test)
    conf_M = confusion_matrix(y_test, y_pred)

    most_missclass = np.argmin(np.diagonal(conf_M)) + 1

    plot_confusion_matrix(conf_M, labels)
    print(most_missclass)
    index_3 = np.where(y_test == 3)
    sel_err1 = np.array(
        [0])  # Numpy indices to select images that are misclassified.
    sel_err = np.array(
        [0])  # Numpy indices to select images that are misclassified.
    sel_err1 = y_pred[(y_pred[index_3] - y_test[index_3] != 0) == True]
    print(sel_err1)
    sel_err = index_3[np.asarray(sel_err1)]

    print(index_3, np.where(y_pred[index_3] - y_test[index_3] != 0))
    i = most_missclass  # should be the label number corresponding the largest classification error

    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='Predicted class')
Exemple #7
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """

    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    C = 0.0003

    clf = svm.SVC(C=C, kernel='linear', decision_function_shape='ovr')
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    labels = range(1, 6)

    plot_confusion_matrix(confusion_matrix(y_test, y_pred), labels)

    sel_error = np.array(
        [0])  # Numpy indices to select images that are misclassified.
    i = 0  # should be the label number corresponding the largest classification error
    #in order to find the most missclassified we sum up the missclassified of every label and then we find the one with maximum error
    sums = np.zeros((5, ))
    k = 0
    for j in y_pred:
        if j != y_test[k]:
            sums[y_test[k] - 1] += 1
            sel_error = np.append(sel_error, k)
        k += 1
    i = np.argmax(sums)

    # Plot with mnist plot
    plot_mnist(x_test[sel_error],
               y_pred[sel_error],
               labels=labels[i],
               k_plots=10,
               prefix='Predicted class')
Exemple #8
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)
    linear = svm.SVC(kernel='linear', C=10, decision_function_shape='ovr')
    linear.fit(x_train, y_train)
    y_pred = linear.predict(x_test)
    cm = confusion_matrix(y_test, y_pred)
    plot_confusion_matrix(cm, labels)

    errors = np.zeros(5)
    for i in range(5):
        for j in range(5):
            if i != j:
                errors[j] += cm[i][j]
    max_err_label = np.argmax(
        errors
    ) + 1  # should be the label number corresponding the largest classification error

    indices = np.nonzero(y_pred == max_err_label)[0].astype(int)
    sel_err = np.array(
        [],
        dtype=int)  # Numpy indices to select images that are misclassified.
    for i in indices:
        if y_test[i] != y_pred[i]:
            sel_err = np.insert(sel_err, sel_err.size, i)

    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=max_err_label,
               k_plots=10,
               prefix='Predicted class')
Exemple #9
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)

    svc_ovo = svm.SVC(kernel='linear', decision_function_shape='ovo',
                      C=10).fit(x_train, y_train)

    y_pred = svc_ovo.predict(x_test)
    cm = confusion_matrix(y_test, y_pred)
    plot_confusion_matrix(cm, labels)

    cp = cm
    np.fill_diagonal(cp, 0)
    i = np.argmax(
        np.max(cp, axis=0)
    )  # should be the label number corresponding the largest classification error
    sel_err = np.argwhere(np.not_equal(
        y_test,
        y_pred))  # Numpy indices to select images that are misclassified.

    import pdb
    pdb.set_trace()

    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='Real class')
Exemple #10
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 images classified as the most misclassified digit using plot_mnist.
    ###########

    clf = svm.SVC(kernel="linear", decision_function_shape='ovr', C=10)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    labels = range(1, 6)

    plot_confusion_matrix(confusion_matrix(y_test, y_pred), labels)
    print("conf: ", confusion_matrix(y_test, y_pred))

    sel_err = np.array([
        9, 25, 643, 654, 668, 685, 696, 727, 738, 739
    ])  # CHANGE ME! Numpy indices to select all images that are misclassified.
    i = 0  # CHANGE ME! Should be the label number corresponding the largest classification error.
    i = 2
    j = 0
    print("sel_err ", sel_err)

    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='Predicted class')
Exemple #11
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    # TODO:
    # Train multi-class SVMs with a LINEAR kernel
    # Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    # Find the index for which you get the highest error rate.
    # Plot the confusion matrix with plot_confusion_matrix.
    # Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)
    svc = svm.SVC(C=10, kernel=LINEAR)
    svc.fit(x_train, y_train)
    y_pred = svc.predict(x_test)

    con_matrix = confusion_matrix(y_test, y_pred, labels)
    plot_confusion_matrix(con_matrix, labels)

    sel_error = np.where(y_test != y_pred)
    error_list = y_pred[sel_error]
    occurences = Counter(error_list)

    # should be the label number corresponding the largest classification error
    i = max(occurences)

    print("Label corresponding to the largest classification error : ", i)

    plot_mnist(x_test[sel_error],
               y_pred[sel_error],
               labels=i,
               k_plots=10,
               prefix='Predicted class')
Exemple #12
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    labels = range(1, 6)

    clf = svm.SVC(kernel="linear", C=3e-4, decision_function_shape='ovr')
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    cm = confusion_matrix(y_test, y_pred, labels)
    plot_confusion_matrix(cm, labels)
    sel_err = np.where(
        y_test !=
        y_pred)  # Numpy indices to select images that are misclassified.
    np.fill_diagonal(cm, 0)
    i, j = np.unravel_index(cm.argmax(), cm.shape)
    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='predicted class')
Exemple #13
0
def ex_3_b(x_train, y_train, x_test, y_test):
    """
    Solution for exercise 3 b)
    :param x_train: Training samples (2-dimensional)
    :param y_train: Training labels
    :param x_test: Testing samples (2-dimensional)
    :param y_test: Testing labels
    :return:
    """
    ###########
    ## TODO:
    ## Train multi-class SVMs with a LINEAR kernel
    ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix.
    ## Find the index for which you get the highest error rate.
    ## Plot the confusion matrix with plot_confusion_matrix.
    ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist.
    ###########

    # helper variables
    m = 0
    c = 10
    kernel_mode = 'linear'

    # init linear svm and train it
    lin_svm = svm.SVC(kernel=kernel_mode, C=c)
    lin_svm.fit(x_train, y_train)

    # pred y to plot conf matrix
    y_pred = lin_svm.predict(x_test)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    plot_confusion_matrix(cm, lin_svm.classes_)

    # helper variables
    most_misclassified_number = 0
    temp_m = cm[0][0]

    # searching for the most missclassifed number/label
    for m in range(1, 5):
        if (temp_m > cm[m][m]):
            temp_m = cm[m][m]
            most_misclassified_number = m

    # given labels
    labels = range(1, 6)

    # helper variables
    temp_list = []
    image_counter = 0
    max_pred = len(y_pred)
    m = 0

    # getting indices of missclassified numbers
    for m in range(0, max_pred):
        if (labels[most_misclassified_number] == y_pred[m]):
            if (y_test[m] != y_pred[m]):
                # add the missclassified image-index to the list
                temp_list.append(m)
                image_counter = image_counter + 1
                # if we have 10 images stop
                if (image_counter == 10):
                    break

    # given output/plot --------------------------------------------------------------------------------

    # Numpy indices to select images that are misclassified.
    sel_err = np.array(temp_list)
    # should be the label number corresponding the largest classification error
    i = most_misclassified_number

    # Plot with mnist plot
    plot_mnist(x_test[sel_err],
               y_pred[sel_err],
               labels=labels[i],
               k_plots=10,
               prefix='Predicted class')