コード例 #1
0
ファイル: TestRun.py プロジェクト: JodesL/NHLStats
def makeOneFold(nb_folds):
    # Returns one fold from the cross-validation training set
    # Note: has to create the whole cross-validation set (could be improved)
    data_trains, data_tests = pp.preprocessing_cross_valid(2012, 2014, nb_folds)
    rand_fold = random.randint(0, nb_folds-1)  # Pick a random fold to test

    np.random.shuffle(data_trains[rand_fold])  # shuffles training examples

    x_train = data_trains[rand_fold][:, 1:]
    y_train = data_trains[rand_fold][:, 0]

    x_test = data_tests[rand_fold][:, 1:]
    y_test = data_tests[rand_fold][:, 0]

    return x_train, y_train, x_test, y_test
コード例 #2
0
ファイル: Comparisons.py プロジェクト: JodesL/NHLStats
import PreprocessData as pp
import TestRun

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

import matplotlib as plt

# This file is used to test other machine learning algorithms

if __name__ == '__main__':
    pass
    data_trains, data_tests = pp.preprocessing_cross_valid(2012, 2014, 9)
    print("Tests")
    errs = []
    for i in range(9):

        x_train = data_trains[i][:, 1:]
        y_train = data_trains[i][:, 0]

        x_test = data_tests[i][:, 1:]
        y_test = data_tests[i][:, 0]

        # logistic regression
        reg = LogisticRegression()
        reg.fit(x_train, y_train)
        print("Error:", reg.score(x_test, y_test))

        # support vector machine
コード例 #3
0
ファイル: TestRun.py プロジェクト: JodesL/NHLStats
def crossValidate(net, nb_folds, iterations=1000, learning_rate=0.01, grad_decay=0.9, epsilon=0.000001, adadelta=False):
    # Splits the data into nb_folds batches using each batch as a testing set in turn and rest as the training set

    ######## Need to fix: how to train on multiple years at once?
    data_trains, data_tests = pp.preprocessing_cross_valid(2012, 2014, nb_folds)
    for i in range(nb_folds):
        np.random.shuffle(data_trains[i])  # shuffles training examples

    min_errs = []
    test_errs = []
    train_errs = []

    nb_buckets = 5  # Could make this a parameter
    freq_probs_test = [0] * nb_buckets
    freq_wins_test = [0] * nb_buckets
    freq_probs_train = [0] * nb_buckets
    freq_wins_train = [0] * nb_buckets

    for i in range(nb_folds):
        print("--- Fold " + str(i+1) + " ---")
        start = time.clock()

        net.reset()
        # Make test and training sets
        x_train = data_trains[i][:, 1:]
        y_train = data_trains[i][:, 0]

        x_test = data_tests[i][:, 1:]
        y_test = data_tests[i][:, 0]

        temp = net.test(x_train, y_train, iterations, learning_rate, grad_decay, epsilon, adadelta, X_test=x_test, y_test=y_test)

        min_errs.append(temp[0])
        test_errs.append(temp[1])
        train_errs.append(temp[2])

        freqs = net.testProbBuckets(x_train, y_train, nb_buckets=nb_buckets, X_test=x_test, y_test=y_test)
        # Aggregates the prob buckets from each fold together
        freq_probs_test = list(map(add, freq_probs_test, freqs[0]))
        freq_wins_test = list(map(add, freq_wins_test, freqs[1]))
        freq_probs_train = list(map(add, freq_probs_train, freqs[2]))
        freq_wins_train = list(map(add, freq_wins_train, freqs[3]))


        print("Time:", time.clock() - start)

    print("\n----------")
    print(net, "\tNb folds:", nb_folds)
    print("Avg min:", sum(min_errs)/nb_folds, "\t\t\t", min_errs)
    print("Avg final test:", sum(test_errs)/nb_folds, "\t\t\t", test_errs)
    print("Avg final train:", sum(train_errs)/nb_folds, "\t\t\t", train_errs)

    probs_test = [freq_wins_test[i]/ freq_probs_test[i] if freq_probs_test[i] != 0 else -1 for i in range(nb_buckets)]
    probs_train = [freq_wins_train[i]/ freq_probs_train[i] if freq_probs_train[i] != 0 else -1 for i in range(nb_buckets)]

    print("Total freq test:")
    print(freq_probs_test)
    print(freq_wins_test)
    print(["{0:.2f}".format(x) for x in probs_test])

    print("Total freq train:")
    print(freq_probs_train)
    print(freq_wins_train)
    print(["{0:.2f}".format(x) for x in probs_train])

    # Returns average min test error
    return sum(min_errs)/nb_folds