def run(model, verbose=0): """Entry Point to run models Args: model: model function to run. """ # Load the data and split into training and validation sets data = GalaxyData(feature_extraction.hog_features, scale_features=False) (training_features, training_solutions, validation_features, validation_solutions) = data.split_training_and_validation_data(50) # Train and Predict Model (clf, columns) = model(training_features, training_solutions, verbose) predicted_validation_solutions = models.predict(clf, validation_features, columns) # Evaluate Predictions valid_rmse = evaluate.get_errors_clf(clf, validation_features, validation_solutions) train_rmse = evaluate.get_errors_clf(clf, training_features, training_solutions) print " Validation RMSE: ", valid_rmse print " Training RMSE: ", train_rmse
def extract_features(extraction_method, index=None, percent_subset=100, classification=False): """Runs the given extraction method on only those galaxys listed in index. Return a subset of those galaxies. Attrubutes: extraction_method: Extraction method to use. See feature_extraction index: Index of Galaxy for which to process data. If None, process all galaxies. percent_subset: Returns a subset of the data of this size (percent). Returns: A Tuple containing (X, y), with X being the features and y the labels. """ data = GalaxyData(extraction_method, scale_features=False) if index is not None: data.set_restricted_universe(index) if percent_subset == 100: (X, y) = data.get_training_data(competition=True) else: (X, y, _, _) = data.split_training_and_validation_data(100-percent_subset, competition=True) y = get_reduced_solutions(y, classification=classification) return (X, y)