def figure_4_4_and_4_8(i, j): """Reproduces figure 4.4 and 4_8 in ESLii displaying canonical coordinates for the vowel data. Here i and j specify which canonical coordinates to show where i and j range from 1 to 10. """ assert(1 <= i and 1 <= j and i < j and i <= 9 and j <= 10) vowels = eslii.read_vowel_data() X = vowels[vowels.columns[1:]] y = vowels['y'] rr_x = LDA().fit_transform(X, y) plt.scatter(rr_x[:, i - 1], rr_x[:, j - 1], c=y)
def table_4_1(): """Reproduces table 4.1 in ESLii showing the training and test error rates for classifying vowels using different classification techniques. The sklearn implementation of logistic regression uses OvA instead of a true multinomial which likely accounts for the worse results """ vowels_train = eslii.read_vowel_data() train_X = vowels_train[vowels_train.columns[1:]] train_y = vowels_train['y'] vowels_test = eslii.read_vowel_data(train=False) test_X = vowels_test[vowels_test.columns[1:]] test_y = vowels_test['y'] lda = LDA().fit(train_X, train_y) print "Linear discriminant analysis: {:.2f} {:.2f}".format( 1 - lda.score(train_X, train_y), 1 - lda.score(test_X, test_y)) qda = QDA().fit(train_X, train_y) print "Quadratic discriminant analysis: {:.2f} {:.2f}".format( 1 - qda.score(train_X, train_y), 1 - qda.score(test_X, test_y)) lr = LogisticRegression(C=1e30).fit(train_X, train_y) print "Logistic regression: {:.2f} {:.2f}".format( 1 - lr.score(train_X, train_y), 1 - lr.score(test_X, test_y))