def get_test_examples(): global test_X global test_y if not test_X: print "Loading test examples" _log_time() test_examples = music.load_examples("data/test_40k_10k.pkl") _print_time_diff() print "Obtaining X and y values" _log_time() test_X = [model.represent(example) for example in test_examples] _print_time_diff() _log_time() test_y = [model.label(example) for example in test_examples] _print_time_diff() return test_X, test_y
clf.fit(X, y) def predict(examples): X = [represent(example) for example in examples] y = clf.predict(X) return y import math from sklearn.metrics import mean_squared_error def rmse(y_true, y_pred): mse = mean_squared_error(y_true, y_pred) return math.sqrt(mse) from sklearn.cross_validation import cross_val_score def validate(examples): X = [represent(example) for example in examples] y = [label(example) for example in examples] scores = cross_val_score(clf, X, y, cv=2, score_func=rmse) return scores if __name__ == "__main__": import music train_examples = music.load_examples('data/train.pkl') import sys if len(sys.argv) > 1: clf.set_params(n_estimators = int(sys.argv[1])) scores = validate(train_examples) print "RMSE: %0.6f (+/- %0.6f)" % (scores.mean(), scores.std()/2)
print "Readying training data" _log_time() music.ready_training_data(should_use_full_ds) _print_time_diff() _draw_separator() print "Readying testing data" _log_time() music.ready_testing_data() _print_time_diff() _draw_separator() print "Start loading examples" _log_time() examples = music.load_examples("data/train.pkl") _print_time_diff() _draw_separator() print "Obtaining all x and y values" _log_time() all_X = [model.represent(example) for example in examples] _print_time_diff() _log_time() all_y = [model.label(example) for example in examples] _print_time_diff() _draw_separator() def print_consolidated_scores(scores): _draw_separator(".", 5)