Esempio n. 1
0
def test_sample_weight():
    dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/toyRegression.zip',
               'name': 'toyRegression',
               'problem_type': REGRESSION,
               'label': 'y',
               'performance_val': 0.183}
    directory_prefix = './datasets/'
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
    print(f"Evaluating Benchmark Dataset {dataset['name']}")
    directory = directory_prefix + dataset['name'] + "/"
    savedir = directory + 'AutogluonOutput/'
    shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
    sample_weight = 'sample_weights'
    weights = np.abs(np.random.rand(len(train_data),))
    test_weights = np.abs(np.random.rand(len(test_data),))
    train_data[sample_weight] = weights
    test_data_weighted = test_data.copy()
    test_data_weighted[sample_weight] = test_weights
    fit_args = {'time_limit': 20}
    predictor = TabularPredictor(label=dataset['label'], path=savedir, problem_type=dataset['problem_type'], sample_weight=sample_weight).fit(train_data, **fit_args)
    ldr = predictor.leaderboard(test_data)
    perf = predictor.evaluate(test_data)
    # Run again with weight_evaluation:
    # FIXME: RMSE doesn't support sample_weight, this entire call doesn't make sense
    predictor = TabularPredictor(label=dataset['label'], path=savedir, problem_type=dataset['problem_type'], sample_weight=sample_weight, weight_evaluation=True).fit(train_data, **fit_args)
    # perf = predictor.evaluate(test_data_weighted)  # TODO: Doesn't work without implementing sample_weight in evaluate
    predictor.distill(time_limit=10)
    ldr = predictor.leaderboard(test_data_weighted)
Esempio n. 2
0
def run_tabular_benchmarks(fast_benchmark, subsample_size, perf_threshold, seed_val, fit_args, dataset_indices=None, run_distill=False, crash_in_oof=False):
    print("Running fit with args:")
    print(fit_args)
    # Each train/test dataset must be located in single directory with the given names.
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    EPS = 1e-10

    # List containing dicts for each dataset to include in benchmark (try to order based on runtimes)
    datasets = get_benchmark_sets()
    if dataset_indices is not None: # only run some datasets
        datasets = [datasets[i] for i in dataset_indices]

    # Aggregate performance summaries obtained in previous benchmark run:
    prev_perf_vals = [dataset['performance_val'] for dataset in datasets]
    previous_avg_performance = np.mean(prev_perf_vals)
    previous_median_performance = np.median(prev_perf_vals)
    previous_worst_performance = np.max(prev_perf_vals)

    # Run benchmark:
    performance_vals = [0.0] * len(datasets) # performance obtained in this run
    directory_prefix = './datasets/'
    with warnings.catch_warnings(record=True) as caught_warnings:
        for idx in range(len(datasets)):
            dataset = datasets[idx]
            train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
            if seed_val is not None:
                seed(seed_val)
                np.random.seed(seed_val)
            print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
            directory = directory_prefix + dataset['name'] + "/"
            savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label = dataset['label']
            y_test = test_data[label]
            test_data = test_data.drop(labels=[label], axis=1)
            if fast_benchmark:
                if subsample_size is None:
                    raise ValueError("fast_benchmark specified without subsample_size")
                if subsample_size < len(train_data):
                    # .sample instead of .head to increase diversity and test cases where data index is not monotonically increasing.
                    train_data = train_data.sample(n=subsample_size, random_state=seed_val)  # subsample for fast_benchmark
            predictor = TabularPredictor(label=label, path=savedir).fit(train_data, **fit_args)
            results = predictor.fit_summary(verbosity=4)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = TabularPredictor.load(savedir)  # Test loading previously-trained predictor from file
            y_pred_empty = predictor.predict(test_data[0:0])
            assert len(y_pred_empty) == 0
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy']  # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2']  # unexplained variance score.
            performance_vals[idx] = perf
            print("Performance on dataset %s: %s   (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
            if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
                warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
                              (dataset['name'], performance_vals[idx]/(EPS+dataset['performance_val'])))
            if predictor._trainer.bagged_mode and not crash_in_oof:
                # TODO: Test index alignment with original training data (first handle duplicated rows / dropped rows edge cases)
                y_pred_oof = predictor.get_oof_pred()
                y_pred_proba_oof = predictor.get_oof_pred_proba(as_multiclass=False)
                y_pred_oof_transformed = predictor.get_oof_pred(transformed=True)
                y_pred_proba_oof_transformed = predictor.get_oof_pred_proba(as_multiclass=False, transformed=True)

                # Assert expected type output
                assert isinstance(y_pred_oof, pd.Series)
                assert isinstance(y_pred_oof_transformed, pd.Series)
                if predictor.problem_type == MULTICLASS:
                    assert isinstance(y_pred_proba_oof, pd.DataFrame)
                    assert isinstance(y_pred_proba_oof_transformed, pd.DataFrame)
                else:
                    if predictor.problem_type == BINARY:
                        assert isinstance(predictor.get_oof_pred_proba(), pd.DataFrame)
                    assert isinstance(y_pred_proba_oof, pd.Series)
                    assert isinstance(y_pred_proba_oof_transformed, pd.Series)

                assert y_pred_oof_transformed.equals(predictor.transform_labels(y_pred_oof, proba=False))

                # Test that the transform_labels method is capable of reproducing the same output when converting back and forth, and test that oof 'transform' parameter works properly.
                y_pred_proba_oof_inverse = predictor.transform_labels(y_pred_proba_oof, proba=True)
                y_pred_proba_oof_inverse_inverse = predictor.transform_labels(y_pred_proba_oof_inverse, proba=True, inverse=True)
                y_pred_oof_inverse = predictor.transform_labels(y_pred_oof)
                y_pred_oof_inverse_inverse = predictor.transform_labels(y_pred_oof_inverse, inverse=True)

                if isinstance(y_pred_proba_oof_transformed, pd.DataFrame):
                    pd.testing.assert_frame_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_frame_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                else:
                    pd.testing.assert_series_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_series_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                pd.testing.assert_series_equal(y_pred_oof_transformed, y_pred_oof_inverse)
                pd.testing.assert_series_equal(y_pred_oof, y_pred_oof_inverse_inverse)

                # Test that index of both the internal training data and the oof outputs are consistent in their index values.
                X_internal, y_internal = predictor.load_data_internal()
                y_internal_index = list(y_internal.index)
                assert list(X_internal.index) == y_internal_index
                assert list(y_pred_oof.index) == y_internal_index
                assert list(y_pred_proba_oof.index) == y_internal_index
                assert list(y_pred_oof_transformed.index) == y_internal_index
                assert list(y_pred_proba_oof_transformed.index) == y_internal_index
            else:
                # Raise exception
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred()
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred_proba()
            if run_distill:
                predictor.distill(time_limit=60, augment_args={'size_factor':0.5})

    # Summarize:
    avg_perf = np.mean(performance_vals)
    median_perf = np.median(performance_vals)
    worst_perf = np.max(performance_vals)
    for idx in range(len(datasets)):
        print("Performance on dataset %s: %s   (previous perf=%s)" % (datasets[idx]['name'], performance_vals[idx], datasets[idx]['performance_val']))

    print("Average performance: %s" % avg_perf)
    print("Median performance: %s" % median_perf)
    print("Worst performance: %s" % worst_perf)

    if not fast_benchmark:
        if avg_perf > previous_avg_performance * perf_threshold:
            warnings.warn("Average Performance is %s times worse than previously." % (avg_perf/(EPS+previous_avg_performance)))
        if median_perf > previous_median_performance * perf_threshold:
            warnings.warn("Median Performance is %s times worse than previously." % (median_perf/(EPS+previous_median_performance)))
        if worst_perf > previous_worst_performance * perf_threshold:
            warnings.warn("Worst Performance is %s times worse than previously." % (worst_perf/(EPS+previous_worst_performance)))

    print("Ran fit with args:")
    print(fit_args)
    # List all warnings again to make sure they are seen:
    print("\n\n WARNINGS:")
    for w in caught_warnings:
        warnings.warn(w.message)
Esempio n. 3
0
# Fit model ensemble:
predictor = TabularPredictor(label).fit(train_data,
                                        auto_stack=True,
                                        time_limit=time_limit)

# Distill ensemble-predictor into single model:

time_limit = 60  # set = None to fully train distilled models

# aug_data below is optional, but this could be additional unlabeled data you may have. Here we use the training data for demonstration, but you should only use new data here:
aug_data = TabularDataset(train_file_path)
aug_data = aug_data.head(subsample_size)

distilled_model_names = predictor.distill(
    time_limit=time_limit, augment_args={'num_augmented_samples': 100}
)  # default distillation (time_limit & augment_args are also optional, here set to suboptimal values to ensure quick runtime)

# Other distillation variants demonstrating different usage options:
predictor.distill(time_limit=time_limit,
                  teacher_preds='soft',
                  augment_method='spunge',
                  augment_args={'size_factor': 1},
                  verbosity=3,
                  models_name_suffix='spunge')

predictor.distill(time_limit=time_limit,
                  hyperparameters={
                      'GBM': {},
                      'NN': {}
                  },