Beispiel #1
0
def run(model, verbose=0):
    """Entry Point to run models

    Args:
        model: model function to run.
    """
    # Load the data and split into training and validation sets
    data = GalaxyData(feature_extraction.hog_features, scale_features=False)

    (training_features, training_solutions,
     validation_features, validation_solutions) = data.split_training_and_validation_data(50)

    # Train and Predict Model
    (clf, columns) = model(training_features, training_solutions, verbose)
    predicted_validation_solutions = models.predict(clf, validation_features, columns)

    # Evaluate Predictions
    valid_rmse = evaluate.get_errors_clf(clf, validation_features, validation_solutions)
    train_rmse = evaluate.get_errors_clf(clf, training_features, training_solutions)
    print " Validation RMSE: ", valid_rmse
    print " Training RMSE: ", train_rmse
def extract_features(extraction_method, index=None, percent_subset=100, classification=False):
    """Runs the given extraction method on only those galaxys listed in index. Return a subset of
    those galaxies.

    Attrubutes:
        extraction_method: Extraction method to use. See feature_extraction
        index: Index of Galaxy for which to process data. If None, process all galaxies.
        percent_subset: Returns a subset of the data of this size (percent).

    Returns: A Tuple containing (X, y), with X being the features and y the labels.
    """
    data = GalaxyData(extraction_method, scale_features=False)
    if index is not None:
        data.set_restricted_universe(index)

    if percent_subset == 100:
        (X, y) = data.get_training_data(competition=True)
    else:
        (X, y, _, _) = data.split_training_and_validation_data(100-percent_subset, competition=True)

    y = get_reduced_solutions(y, classification=classification)
    return (X, y)