Beispiel #1
0
def collect_results(config, destination, experiment_name=None,
                    use_integration=False):
    print(utils.colored("Collecting Results"))
    if use_integration:
        config = INT_CONFIG_PATH

    if experiment_name is None:
        experiments = hcnn.driver.Driver.available_experiments(config)
    else:
        experiments = [experiment_name]

    results = []
    for experiment_name in experiments:
        if not use_integration and "integrationtest" in experiment_name:
            continue
        elif (use_integration and
                "integrationtest" not in experiment_name):
            continue

        print("Collecting experiment", utils.colored(experiment_name, 'cyan'))

        driver = hcnn.driver.Driver(config, experiment_name=experiment_name,
                                    load_features=False,
                                    skip_load_dataset=True)

        results.append(driver.collect_results(destination))

    return all(results)
Beispiel #2
0
def test_collect_results(config_with_workspace, pre_existing_experiment,
                         available_datasets, workspace):
    driver = hcnn.driver.Driver(config_with_workspace,
                                experiment_name=pre_existing_experiment,
                                load_features=False,
                                skip_load_dataset=True)

    destination_dir = os.path.join(workspace, "results")
    result = driver.collect_results(destination_dir)

    assert result is True

    new_experiment_dir = os.path.join(destination_dir, pre_existing_experiment)
    assert os.path.isdir(new_experiment_dir)

    for dataset in available_datasets:
        dataset_results = os.path.join(new_experiment_dir, dataset)
        assert os.path.isdir(dataset_results)

        training_loss_fp = os.path.join(dataset_results, "training_loss.pkl")
        assert os.path.isfile(training_loss_fp)
        training_loss_df = pd.read_pickle(training_loss_fp)
        assert [x in training_loss_df.columns for x in ['iteration', 'loss']]

        validation_loss_fp = os.path.join(dataset_results,
                                          "validation_loss.pkl")
        assert os.path.isfile(validation_loss_fp)
        validation_loss_df = pd.read_pickle(validation_loss_fp)
        assert [x in validation_loss_df.columns
                for x in ['mean_acc', 'mean_loss', 'model_file',
                          'model_iteration']]

        prediction_glob = os.path.join(dataset_results, "*predictions.pkl")
        assert len(prediction_glob) > 0
        prediction_file = glob.glob(prediction_glob)[0]
        prediction_df = pd.read_pickle(prediction_file)
        assert [x in prediction_df.columns for x in ['y_pred', 'y_true']]

    # Finally, collect_results should create an overall analysis of the
    # three-fold validation, and put it in
    overall_results_fp = os.path.join(
        new_experiment_dir, "experiment_results.json")
    assert os.path.isfile(overall_results_fp)
    with open(overall_results_fp, 'r') as fh:
        result_data = json.load(fh)
    for dataset in available_datasets:
        assert dataset in result_data
        assert 'mean_accuracy' in result_data[dataset]
        assert 'mean_precision' in result_data[dataset]
        assert 'mean_recall' in result_data[dataset]
        assert 'mean_f1' in result_data[dataset]
        assert 'class_precision' in result_data[dataset]
        assert 'class_recall' in result_data[dataset]
        assert 'class_f1' in result_data[dataset]
        assert 'sample_weight' in result_data[dataset]