Beispiel #1
0
def branche_data_test(lr=0.05, batch_size=64, epochs=100):
    keys = [(561010, 'Restauranter'), (620100, 'Computerprogrammering')]
    feat_train, feat_test, y_train, y_test, cnames = get_branche_data(keys)
    c = CountVectorizer()
    c.fit(feat_train)
    bag_of_words_feat_train = c.transform(feat_train).toarray()
    classifier = LogisticRegressionClassifier()
    classifier.fit(bag_of_words_feat_train,
                   y_train,
                   lr=lr,
                   batch_size=batch_size,
                   epochs=epochs)
    print('Logistic Regression Industri Codes Classifier')
    bag_of_words_feat_test = c.transform(feat_test).toarray()
    print_score(classifier, bag_of_words_feat_train, bag_of_words_feat_test,
                y_train, y_test)
    hist = classifier.history
    fig, ax = plt.subplots()
    ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x')
    ax.set_title('Cost as a function of epoch for industry codes data')
    ax.set_xlabel('epoch')
    ax.set_ylabel('Ein (1/n NLL)')
    export_fig(fig, 'logreg_text_cost_per_epoch.png')
    plt.show()
    fig.savefig("foo.pdf")
def wine_test(epochs=200, batch_size=16, lr=0.1):
    print(
        'wine test: params - epochs {0}, batch_size: {1}, learning rate: {2}'.
        format(epochs, batch_size, lr))
    features, target = load_wine(return_X_y=True)

    # Make a train/test split using 30% test size
    RANDOM_STATE = 42
    X_train, X_test, y_train, y_test = train_test_split(
        features, target, test_size=0.9, random_state=RANDOM_STATE)
    sc = StandardScaler(
    )  # makes every features zero mean standard deviation 1 - makes learning problem much easier (massages the error function so sgd works better)
    sc.fit(X_train)
    s = SoftmaxClassifier(num_classes=3)
    X_train = sc.transform(X_train)
    X_train = np.c_[np.ones(X_train.shape[0]), X_train]  # adds bias var
    X_test = sc.transform(X_test)
    X_test = np.c_[np.ones(X_test.shape[0]), X_test]  # adds bias var
    s.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr)
    print('Softmax Wine Classifier')
    print_score(s, X_train, X_test, y_train, y_test)
    hist = s.history
    fig, ax = plt.subplots()
    ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x')
    ax.set_title('Cost as a function of epoch for wine data')
    ax.set_xlabel('epoch')
    ax.set_ylabel('Ein (1/n NLL)')
    export_fig(fig, 'softmax_wine_cost_per_epoch.png')
    plt.show()
Beispiel #3
0
def branche_data_test():
    keys = [(561010, 'Restauranter'), (620100, 'Computerprogrammering')]
    feat_train, feat_test, y_train, y_test, cnames = get_branche_data(keys)
    c = CountVectorizer()
    c.fit(feat_train)
    bag_of_words_feat_train = c.transform(feat_train).toarray()
    classifier = LogisticRegressionClassifier()
    classifier.fit(bag_of_words_feat_train,
                   y_train,
                   lr=0.1,
                   batch_size=16,
                   epochs=50)
    print('Logistic Regression Industri Codes Classifier')
    bag_of_words_feat_test = c.transform(feat_test).toarray()
    print_score(classifier, bag_of_words_feat_train, bag_of_words_feat_test,
                y_train, y_test)
def digits_test(epochs=10, batch_size=32, lr=0.05):
    print(
        'digits test: params - epochs {0}, batch_size: {1}, learning rate: {2}'
        .format(epochs, batch_size, lr))
    sc = SoftmaxClassifier(num_classes=10)
    X_train, y_train = load_digits_train_data()
    X_test, y_test = load_digits_test_data()
    sc.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, lr=lr)
    print_score(sc, X_train, X_test, y_train, y_test)
    fig, ax = plt.subplots()
    hist = sc.history
    ax.plot(np.array(range(1, 1 + len(hist))), hist, 'b-x')
    ax.set_xlabel('epoch')
    ax.set_ylabel('Ein (1/n NLL)')
    ax.set_title('softmax cost on digits as function of epoch')
    export_fig(fig, 'softmax_cost_per_epoch.png')
    plt.show()