예제 #1
0
def competition_run():
    data = GalaxyData()

    (training_features, training_solutions) = data.get_training_data()
    (test_features, _) = data.get_test_data()

    # Predict
    (clf, columns) = models.default_model(training_features, training_solutions, 5)
    predicted_solutions = models.predict(clf, test_features, columns)

    data.save_solution(predicted_solutions)
예제 #2
0
def run_training_test(model, verbose=0):
    """Entry Point to run models

    Args:
        model: model function to run.
    """
    # Load the data and split into training and validation sets
    data = GalaxyData(feature_extraction.raw_9, scale_features=False)

    (test_features, test_solutions) = data.get_test_data()
    (training_features, training_solutions) = data.get_training_data()

    # Train and Predict Model
    (clf, columns) = model(training_features, training_solutions, verbose)
    predicted_solutions = models.predict(clf, test_features, columns)

    # Evaluate Predictions
    score = evaluate.get_rmse(test_solutions, predicted_solutions)
    print(score)
예제 #3
0
import numpy as np

from evaluate import cross_validate
from galaxy_data import GalaxyData

from sklearn import (ensemble, cross_validation)

data = GalaxyData(scale_features=False)
(X_train, y_train) = data.get_training_data()
(X_test, y_test) = data.get_test_data()

clf = ensemble.RandomForestRegressor(n_estimators=1, n_jobs=-1, verbose=5)

scores = cross_validate(clf, X_train, y_train, 2)
mean_score = sum(scores) / float(scores.shape[0])
print(scores)
print(mean_score)