def competition_run(): data = GalaxyData() (training_features, training_solutions) = data.get_training_data() (test_features, _) = data.get_test_data() # Predict (clf, columns) = models.default_model(training_features, training_solutions, 5) predicted_solutions = models.predict(clf, test_features, columns) data.save_solution(predicted_solutions)
def run_training_test(model, verbose=0): """Entry Point to run models Args: model: model function to run. """ # Load the data and split into training and validation sets data = GalaxyData(feature_extraction.raw_9, scale_features=False) (test_features, test_solutions) = data.get_test_data() (training_features, training_solutions) = data.get_training_data() # Train and Predict Model (clf, columns) = model(training_features, training_solutions, verbose) predicted_solutions = models.predict(clf, test_features, columns) # Evaluate Predictions score = evaluate.get_rmse(test_solutions, predicted_solutions) print(score)
import numpy as np from evaluate import cross_validate from galaxy_data import GalaxyData from sklearn import (ensemble, cross_validation) data = GalaxyData(scale_features=False) (X_train, y_train) = data.get_training_data() (X_test, y_test) = data.get_test_data() clf = ensemble.RandomForestRegressor(n_estimators=1, n_jobs=-1, verbose=5) scores = cross_validate(clf, X_train, y_train, 2) mean_score = sum(scores) / float(scores.shape[0]) print(scores) print(mean_score)