def branche_data_test(lr=0.05, batch_size=64, epochs=100): keys = [(561010, 'Restauranter'), (620100, 'Computerprogrammering')] feat_train, feat_test, y_train, y_test, cnames = get_branche_data(keys) c = CountVectorizer() c.fit(feat_train) bag_of_words_feat_train = c.transform(feat_train).toarray() classifier = LogisticRegressionClassifier() classifier.fit(bag_of_words_feat_train, y_train, lr=lr, batch_size=batch_size, epochs=epochs) print('Logistic Regression Industri Codes Classifier') bag_of_words_feat_test = c.transform(feat_test).toarray() print_score(classifier, bag_of_words_feat_train, bag_of_words_feat_test, y_train, y_test) hist = classifier.history fig, ax = plt.subplots() ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x') ax.set_title('Cost as a function of epoch for industry codes data') ax.set_xlabel('epoch') ax.set_ylabel('Ein (1/n NLL)') export_fig(fig, 'logreg_text_cost_per_epoch.png') plt.show() fig.savefig("foo.pdf")
def wine_test(epochs=200, batch_size=16, lr=0.1): print( 'wine test: params - epochs {0}, batch_size: {1}, learning rate: {2}'. format(epochs, batch_size, lr)) features, target = load_wine(return_X_y=True) # Make a train/test split using 30% test size RANDOM_STATE = 42 X_train, X_test, y_train, y_test = train_test_split( features, target, test_size=0.9, random_state=RANDOM_STATE) sc = StandardScaler( ) # makes every features zero mean standard deviation 1 - makes learning problem much easier (massages the error function so sgd works better) sc.fit(X_train) s = SoftmaxClassifier(num_classes=3) X_train = sc.transform(X_train) X_train = np.c_[np.ones(X_train.shape[0]), X_train] # adds bias var X_test = sc.transform(X_test) X_test = np.c_[np.ones(X_test.shape[0]), X_test] # adds bias var s.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr) print('Softmax Wine Classifier') print_score(s, X_train, X_test, y_train, y_test) hist = s.history fig, ax = plt.subplots() ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x') ax.set_title('Cost as a function of epoch for wine data') ax.set_xlabel('epoch') ax.set_ylabel('Ein (1/n NLL)') export_fig(fig, 'softmax_wine_cost_per_epoch.png') plt.show()
def branche_data_test(): keys = [(561010, 'Restauranter'), (620100, 'Computerprogrammering')] feat_train, feat_test, y_train, y_test, cnames = get_branche_data(keys) c = CountVectorizer() c.fit(feat_train) bag_of_words_feat_train = c.transform(feat_train).toarray() classifier = LogisticRegressionClassifier() classifier.fit(bag_of_words_feat_train, y_train, lr=0.1, batch_size=16, epochs=50) print('Logistic Regression Industri Codes Classifier') bag_of_words_feat_test = c.transform(feat_test).toarray() print_score(classifier, bag_of_words_feat_train, bag_of_words_feat_test, y_train, y_test)
def digits_test(epochs=10, batch_size=32, lr=0.05): print( 'digits test: params - epochs {0}, batch_size: {1}, learning rate: {2}' .format(epochs, batch_size, lr)) sc = SoftmaxClassifier(num_classes=10) X_train, y_train = load_digits_train_data() X_test, y_test = load_digits_test_data() sc.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr) print_score(sc, X_train, X_test, y_train, y_test) fig, ax = plt.subplots() hist = sc.history ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x') ax.set_xlabel('epoch') ax.set_ylabel('Ein (1/n NLL)') ax.set_title('softmax cost on digits as function of epoch') export_fig(fig, 'softmax_cost_per_epoch.png') plt.show()