Exemple #1
0
def predict(config,
            experiment_name,
            test_set,
            model_name,
            select_epoch=None):
    """Predict results on all datasets and report results.

    Parameters
    ----------
    config : str

    experiment_name : str
        Name of the experiment. Files are saved in a folder of this name.

    model_name : str
        Name of the model to use for training. Must match the training
        configuration.

    select_epoch : str or None
        Which model params to select. Use the epoch number for this, for
        instance "1830" would use the model file "params1830.npz".
        If None, uses "final.npz"
    """
    print(utils.colored("Evaluating"))
    config = C.Config.load(config)

    driver = hcnn.driver.Driver(config, model_name=model_name,
                                experiment_name=experiment_name,
                                load_features=True)
    results = driver.predict(select_epoch)
    logger.info("Generated results for {} files.".format(len(results)))
def test_find_best_model(config_with_workspace, model_name, workspace):
    experiment_name = "testexperiment"
    hold_out = "rwc"
    driver = hcnn.driver.Driver(config_with_workspace, model_name=model_name,
                                experiment_name=experiment_name,
                                load_features=True)
    driver.setup_partitions(hold_out)
    result = driver.train_model()
    assert result is True

    # Create a vastly reduced validation dataframe so it'll take less long.
    validation_size = 3
    driver.valid_set.df = driver.valid_set.df.sample(n=validation_size,
                                                     replace=True)
    assert len(driver.valid_set.df) == validation_size
    driver.test_set.df = driver.test_set.df.sample(n=validation_size,
                                                   replace=True)

    results_df = driver.find_best_model()
    # check that the results_df is ordered by iteration.
    assert all(results_df["model_iteration"] ==
               sorted(results_df["model_iteration"]))

    # Get the best param
    param_iter = driver.select_best_iteration(results_df)
    assert param_iter is not None

    # load it again to test the reloading thing.
    #  Just making sure this runs through
    results_df2 = driver.find_best_model()
    assert all(results_df == results_df2)

    # Shrink the dataset so this doesn't take forever.
    driver.dataset.df = driver.dataset.df.sample(n=10, replace=True)
    predictions_df = driver.predict(param_iter)
    assert not predictions_df.empty
    predictions_df_path = os.path.join(
        workspace, experiment_name, hold_out,
        "model_{}_predictions.pkl".format(param_iter))
    assert os.path.exists(predictions_df_path)