def get_test_examples():
    global test_X
    global test_y
    if not test_X:
        print "Loading test examples"
        _log_time()
        test_examples = music.load_examples("data/test_40k_10k.pkl")
        _print_time_diff()
        print "Obtaining X and y values"
        _log_time()
        test_X = [model.represent(example) for example in test_examples]
        _print_time_diff()
        _log_time()
        test_y = [model.label(example) for example in test_examples]
        _print_time_diff()
    return test_X, test_y
Example #2
0
    clf.fit(X, y)

def predict(examples):
    X = [represent(example) for example in examples]
    y = clf.predict(X)
    return y

import math
from sklearn.metrics import mean_squared_error

def rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return math.sqrt(mse)

from sklearn.cross_validation import cross_val_score

def validate(examples):
    X = [represent(example) for example in examples]
    y = [label(example) for example in examples]
    scores = cross_val_score(clf, X, y, cv=2, score_func=rmse)
    return scores

if __name__ == "__main__":
    import music
    train_examples = music.load_examples('data/train.pkl')
    import sys
    if len(sys.argv) > 1:
        clf.set_params(n_estimators = int(sys.argv[1]))
    scores = validate(train_examples)
    print "RMSE: %0.6f (+/- %0.6f)" % (scores.mean(), scores.std()/2)
    print "Readying training data"
    _log_time()
    music.ready_training_data(should_use_full_ds)
    _print_time_diff()
    _draw_separator()

    print "Readying testing data"
    _log_time()
    music.ready_testing_data()
    _print_time_diff()
    _draw_separator()


print "Start loading examples"
_log_time()
examples = music.load_examples("data/train.pkl")
_print_time_diff()
_draw_separator()

print "Obtaining all x and y values"
_log_time()
all_X = [model.represent(example) for example in examples]
_print_time_diff()
_log_time()
all_y = [model.label(example) for example in examples]
_print_time_diff()
_draw_separator()


def print_consolidated_scores(scores):
    _draw_separator(".", 5)