def predict(config, experiment_name, test_set, model_name, select_epoch=None): """Predict results on all datasets and report results. Parameters ---------- config : str experiment_name : str Name of the experiment. Files are saved in a folder of this name. model_name : str Name of the model to use for training. Must match the training configuration. select_epoch : str or None Which model params to select. Use the epoch number for this, for instance "1830" would use the model file "params1830.npz". If None, uses "final.npz" """ print(utils.colored("Evaluating")) config = C.Config.load(config) driver = hcnn.driver.Driver(config, model_name=model_name, experiment_name=experiment_name, load_features=True) results = driver.predict(select_epoch) logger.info("Generated results for {} files.".format(len(results)))
def test_find_best_model(config_with_workspace, model_name, workspace): experiment_name = "testexperiment" hold_out = "rwc" driver = hcnn.driver.Driver(config_with_workspace, model_name=model_name, experiment_name=experiment_name, load_features=True) driver.setup_partitions(hold_out) result = driver.train_model() assert result is True # Create a vastly reduced validation dataframe so it'll take less long. validation_size = 3 driver.valid_set.df = driver.valid_set.df.sample(n=validation_size, replace=True) assert len(driver.valid_set.df) == validation_size driver.test_set.df = driver.test_set.df.sample(n=validation_size, replace=True) results_df = driver.find_best_model() # check that the results_df is ordered by iteration. assert all(results_df["model_iteration"] == sorted(results_df["model_iteration"])) # Get the best param param_iter = driver.select_best_iteration(results_df) assert param_iter is not None # load it again to test the reloading thing. # Just making sure this runs through results_df2 = driver.find_best_model() assert all(results_df == results_df2) # Shrink the dataset so this doesn't take forever. driver.dataset.df = driver.dataset.df.sample(n=10, replace=True) predictions_df = driver.predict(param_iter) assert not predictions_df.empty predictions_df_path = os.path.join( workspace, experiment_name, hold_out, "model_{}_predictions.pkl".format(param_iter)) assert os.path.exists(predictions_df_path)