def test_advanced_functionality(): fast_benchmark = True dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip', 'name': 'AdultIncomeBinaryClassification', 'problem_type': BINARY} label = 'class' directory_prefix = './datasets/' train_file = 'train_data.csv' test_file = 'test_data.csv' train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url']) if fast_benchmark: # subsample for fast_benchmark subsample_size = 100 train_data = train_data.head(subsample_size) test_data = test_data.head(subsample_size) print(f"Evaluating Advanced Functionality on Benchmark Dataset {dataset['name']}") directory = directory_prefix + 'advanced/' + dataset['name'] + "/" savedir = directory + 'AutogluonOutput/' shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed. predictor = TabularPredictor(label=label, path=savedir).fit(train_data) leaderboard = predictor.leaderboard(data=test_data) extra_metrics = ['accuracy', 'roc_auc', 'log_loss'] leaderboard_extra = predictor.leaderboard(data=test_data, extra_info=True, extra_metrics=extra_metrics) assert set(predictor.get_model_names()) == set(leaderboard['model']) assert set(predictor.get_model_names()) == set(leaderboard_extra['model']) assert set(leaderboard_extra.columns).issuperset(set(leaderboard.columns)) assert len(leaderboard) == len(leaderboard_extra) assert set(leaderboard_extra.columns).issuperset(set(extra_metrics)) # Assert that extra_metrics are present in output num_models = len(predictor.get_model_names()) feature_importances = predictor.feature_importance(data=test_data) original_features = set(train_data.columns) original_features.remove(label) assert set(feature_importances.index) == original_features assert set(feature_importances.columns) == {'importance', 'stddev', 'p_value', 'n', 'p99_high', 'p99_low'} predictor.transform_features() predictor.transform_features(data=test_data) predictor.info() assert predictor.get_model_names_persisted() == [] # Assert that no models were persisted during training assert predictor.unpersist_models() == [] # Assert that no models were unpersisted persisted_models = predictor.persist_models(models='all', max_memory=None) assert set(predictor.get_model_names_persisted()) == set(persisted_models) # Ensure all models are persisted assert predictor.persist_models(models='all', max_memory=None) == [] # Ensure that no additional models are persisted on repeated calls unpersised_models = predictor.unpersist_models() assert set(unpersised_models) == set(persisted_models) assert predictor.get_model_names_persisted() == [] # Assert that all models were unpersisted # Raise exception with pytest.raises(NetworkXError): predictor.persist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) assert predictor.get_model_names_persisted() == [] assert predictor.unpersist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) == [] predictor.persist_models(models='all', max_memory=None) predictor.save() # Save predictor while models are persisted: Intended functionality is that they won't be persisted when loaded. predictor_loaded = TabularPredictor.load(predictor.path) # Assert that predictor loading works leaderboard_loaded = predictor_loaded.leaderboard(data=test_data) assert len(leaderboard) == len(leaderboard_loaded) assert predictor_loaded.get_model_names_persisted() == [] # Assert that models were not still persisted after loading predictor assert(predictor.get_model_full_dict() == dict()) predictor.refit_full() assert(len(predictor.get_model_full_dict()) == num_models) assert(len(predictor.get_model_names()) == num_models * 2) for model in predictor.get_model_names(): predictor.predict(data=test_data, model=model) predictor.refit_full() # Confirm that refit_models aren't further refit. assert(len(predictor.get_model_full_dict()) == num_models) assert(len(predictor.get_model_names()) == num_models * 2) predictor.delete_models(models_to_keep=[]) # Test that dry-run doesn't delete models assert(len(predictor.get_model_names()) == num_models * 2) predictor.predict(data=test_data) predictor.delete_models(models_to_keep=[], dry_run=False) # Test that dry-run deletes models assert len(predictor.get_model_names()) == 0 assert len(predictor.leaderboard()) == 0 assert len(predictor.leaderboard(extra_info=True)) == 0 try: predictor.predict(data=test_data) except: pass else: raise AssertionError('predictor.predict should raise exception after all models are deleted') print('Tabular Advanced Functionality Test Succeeded.')
def inner_test_tabular(testname): # Find the named test test = None for t in tests: if t['name'] == testname: test = t assert test is not None, f"Could not find test {testname}" # Build the dataset (dftrain, dftest) = make_dataset(request=test, seed=0) # Check the synthetic dataset itself hasn't changed. We round it to 3dp otherwise tiny floating point differences # between platforms can give a different hash that still yields same prediction scores. # Ultimately it doesn't matter how we do this as long as the same dataset gives the same hash function on # different python versions and architectures. current_hash = hashlib.sha256( dftrain.round(decimals=3).values.tobytes()).hexdigest()[0:10] proposedconfig = "Proposed new config:\n" proposedconfig += f"'dataset_hash' : '{current_hash}'," assert current_hash == test[ 'dataset_hash'], f"Test '{testname}' input dataset has changed. All scores will change.\n" + proposedconfig # Now run the Predictor 1 or more times with various parameters, and make sure we get # back the expected results. # Params can either omitted, or a single run, or a list of runs. if 'params' not in test: test['params'] = {'predict': {}, 'fit': {}} if not isinstance(test['params'], list): test['params'] = [test['params']] for params in test['params']: # Run this model and set of params predictor = TabularPredictor(label='label', **params['predict']) predictor.fit(dftrain, **params['fit']) leaderboard = predictor.leaderboard(dftest, silent=True) leaderboard = leaderboard.sort_values( by='model' ) # So we can pre-generate sample config in alphabetical order # Store proposed new config based on the current run, in case the developer wants to keep thee results (just cut and paste). proposedconfig = "Proposed new config:\n" proposedconfig += "'expected_score_range' : {\n" for model in leaderboard['model']: midx_in_leaderboard = leaderboard.index.values[leaderboard['model'] == model][0] if np.isnan(leaderboard['score_test'][midx_in_leaderboard]): values = "np.nan, np.nan" else: if model in test['expected_score_range'] and not np.isnan( test['expected_score_range'][model][1]): currentprecision = test['expected_score_range'][model][1] else: currentprecision = 0.01 values = "{}, {}".format( myfloor(leaderboard['score_test'][midx_in_leaderboard], currentprecision), currentprecision) proposedconfig += f" '{model}': ({values}),\n" proposedconfig += "},\n" # First validate the model list was as expected. assert set(leaderboard['model']) == set( test['expected_score_range'].keys() ), (f"Test '{testname}' params {params} got unexpected model list.\n" + proposedconfig) # Now validate the scores for each model were as expected. all_assertions_met = True currentconfig = "Existing config:\n" currentconfig += "'expected_score_range' : {\n" for model in sorted(test['expected_score_range']): midx_in_leaderboard = leaderboard.index.values[leaderboard['model'] == model][0] assert leaderboard['model'][midx_in_leaderboard] == model expectedrange = test['expected_score_range'][model][1] expectedmin = test['expected_score_range'][model][0] expectedmax = expectedmin + expectedrange if np.isnan(expectedmin): values = "np.nan, np.nan" else: values = "{}, {}".format(expectedmin, expectedrange) if (( (leaderboard['score_test'][midx_in_leaderboard] >= expectedmin) and (leaderboard['score_test'][midx_in_leaderboard] <= expectedmax)) or (np.isnan(leaderboard['score_test'][midx_in_leaderboard]) and np.isnan(expectedmin))): currentconfig += f" '{model}': ({values}),\n" else: currentconfig += f" '{model}': ({values}), # <--- not met, got {leaderboard['score_test'][midx_in_leaderboard]} \n" all_assertions_met = False currentconfig += "},\n" assert all_assertions_met, f"Test '{testname}', params {params} had unexpected scores:\n" + currentconfig + proposedconfig # Clean up this model created with specific params. predictor.delete_models(models_to_keep=[], dry_run=False)