def test_sample_weight(): dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/toyRegression.zip', 'name': 'toyRegression', 'problem_type': REGRESSION, 'label': 'y', 'performance_val': 0.183} directory_prefix = './datasets/' train_file = 'train_data.csv' test_file = 'test_data.csv' train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url']) print(f"Evaluating Benchmark Dataset {dataset['name']}") directory = directory_prefix + dataset['name'] + "/" savedir = directory + 'AutogluonOutput/' shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed. sample_weight = 'sample_weights' weights = np.abs(np.random.rand(len(train_data),)) test_weights = np.abs(np.random.rand(len(test_data),)) train_data[sample_weight] = weights test_data_weighted = test_data.copy() test_data_weighted[sample_weight] = test_weights fit_args = {'time_limit': 20} predictor = TabularPredictor(label=dataset['label'], path=savedir, problem_type=dataset['problem_type'], sample_weight=sample_weight).fit(train_data, **fit_args) ldr = predictor.leaderboard(test_data) perf = predictor.evaluate(test_data) # Run again with weight_evaluation: # FIXME: RMSE doesn't support sample_weight, this entire call doesn't make sense predictor = TabularPredictor(label=dataset['label'], path=savedir, problem_type=dataset['problem_type'], sample_weight=sample_weight, weight_evaluation=True).fit(train_data, **fit_args) # perf = predictor.evaluate(test_data_weighted) # TODO: Doesn't work without implementing sample_weight in evaluate predictor.distill(time_limit=10) ldr = predictor.leaderboard(test_data_weighted)
def test_quantile(): quantile_levels = [0.01, 0.02, 0.05, 0.98, 0.99] dataset = { 'url': 'https://autogluon.s3.amazonaws.com/datasets/toyRegression.zip', 'name': 'toyRegression', 'problem_type': QUANTILE, 'label': 'y' } directory_prefix = './datasets/' train_file = 'train_data.csv' test_file = 'test_data.csv' train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url']) print(f"Evaluating Benchmark Dataset {dataset['name']}") directory = directory_prefix + dataset['name'] + "/" savedir = directory + 'AutogluonOutput/' shutil.rmtree( savedir, ignore_errors=True ) # Delete AutoGluon output directory to ensure previous runs' information has been removed. fit_args = {'time_limit': 20} predictor = TabularPredictor(label=dataset['label'], path=savedir, problem_type=dataset['problem_type'], quantile_levels=quantile_levels).fit( train_data, **fit_args) ldr = predictor.leaderboard(test_data) perf = predictor.evaluate(test_data)
def test_image_predictor(fit_helper): from autogluon.vision import ImageDataset train_data, _, test_data = ImageDataset.from_folders('https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip') feature_metadata = FeatureMetadata.from_df(train_data).add_special_types({'image': ['image_path']}) predictor = TabularPredictor(label='label').fit( train_data=train_data, hyperparameters={'AG_IMAGE_NN': {'epochs': 2, 'model': 'resnet18_v1b'}}, feature_metadata=feature_metadata ) leaderboard = predictor.leaderboard(test_data) assert len(leaderboard) > 0
def train(args): model_output_dir = f'{args.output_dir}/data' is_distributed = len(args.hosts) > 1 host_rank = args.hosts.index(args.current_host) dist_ip_addrs = args.hosts dist_ip_addrs.pop(host_rank) # Load training and validation data print(f'Train files: {os.listdir(args.train)}') train_data = __load_input_data(args.train) # Extract column info target = args.init_args['label'] columns = train_data.columns.tolist() column_dict = {"columns": columns} with open('columns.pkl', 'wb') as f: pickle.dump(column_dict, f) # Train models args.init_args['path'] = args.model_dir #args.fit_args.pop('label', None) predictor = TabularPredictor(**args.init_args).fit(train_data, **args.fit_args) # Results summary predictor.fit_summary(verbosity=3) #model_summary_fname_src = os.path.join(predictor.output_directory, 'SummaryOfModels.html') model_summary_fname_src = os.path.join(args.model_dir, 'SummaryOfModels.html') model_summary_fname_tgt = os.path.join(model_output_dir, 'SummaryOfModels.html') if os.path.exists(model_summary_fname_src): shutil.copy(model_summary_fname_src, model_summary_fname_tgt) # ensemble visualization G = predictor._trainer.model_graph remove = [node for node, degree in dict(G.degree()).items() if degree < 1] G.remove_nodes_from(remove) A = nx.nx_agraph.to_agraph(G) A.graph_attr.update(rankdir='BT') A.node_attr.update(fontsize=10) for node in A.iternodes(): node.attr['shape'] = 'rectagle' A.draw(os.path.join(model_output_dir, 'ensemble-model.png'), format='png', prog='dot') # Optional test data if args.test: print(f'Test files: {os.listdir(args.test)}') test_data = __load_input_data(args.test) # Test data must be labeled for scoring if target in test_data: # Leaderboard on test data print('Running model on test data and getting Leaderboard...') leaderboard = predictor.leaderboard(test_data, silent=True) print(format_for_print(leaderboard), end='\n\n') leaderboard.to_csv(f'{model_output_dir}/leaderboard.csv', index=False) # Feature importance on test data # Note: Feature importance must be calculated on held-out (test) data. # If calculated on training data it will be biased due to overfitting. if args.feature_importance: print('Feature importance:') # Increase rows to print feature importance pd.set_option('display.max_rows', 500) feature_importance_df = predictor.feature_importance(test_data) print(feature_importance_df) feature_importance_df.to_csv( f'{model_output_dir}/feature_importance.csv', index=True) # Classification report and confusion matrix for classification model if predictor.problem_type in [BINARY, MULTICLASS]: from sklearn.metrics import classification_report, confusion_matrix X_test = test_data.drop(target, axis=1) y_test_true = test_data[target] y_test_pred = predictor.predict(X_test) y_test_pred_prob = predictor.predict_proba(X_test, as_multiclass=True) report_dict = classification_report( y_test_true, y_test_pred, output_dict=True, labels=predictor.class_labels) report_dict_df = pd.DataFrame(report_dict).T report_dict_df.to_csv( f'{model_output_dir}/classification_report.csv', index=True) cm = confusion_matrix(y_test_true, y_test_pred, labels=predictor.class_labels) cm_df = pd.DataFrame(cm, predictor.class_labels, predictor.class_labels) sns.set(font_scale=1) cmap = 'coolwarm' sns.heatmap(cm_df, annot=True, fmt='d', cmap=cmap) plt.title('Confusion Matrix') plt.ylabel('true label') plt.xlabel('predicted label') plt.show() plt.savefig(f'{model_output_dir}/confusion_matrix.png') get_roc_auc(y_test_true, y_test_pred_prob, predictor.class_labels, predictor.class_labels_internal, model_output_dir) else: warnings.warn( 'Skipping eval on test data since label column is not included.' ) # Files summary print(f'Model export summary:') print(f"/opt/ml/model/: {os.listdir('/opt/ml/model/')}") models_contents = os.listdir('/opt/ml/model/models') print(f"/opt/ml/model/models: {models_contents}") print(f"/opt/ml/model directory size: {du('/opt/ml/model/')}\n")
predictor.distill(time_limit=time_limit, hyperparameters={ 'GBM': {}, 'NN': {} }, teacher_preds='soft', augment_method='munge', augment_args={ 'size_factor': 1, 'max_size': 100 }, models_name_suffix='munge') predictor.distill( augmentation_data=aug_data, time_limit=time_limit, teacher_preds='soft', models_name_suffix='extra') # augmentation with "extra" unlabeled data. predictor.distill( time_limit=time_limit, teacher_preds=None, models_name_suffix='noteacher') # standard training without distillation. # Compare performance of different models on test data after distillation: ldr = predictor.leaderboard(test_data) model_to_deploy = distilled_model_names[0] y_pred = predictor.predict_proba(test_data, model_to_deploy) print(y_pred[:5])
def autogluon(df, task, timelife): pd.options.mode.chained_assignment = None df_new = copy.copy(df) X, y, _ = return_X_y(df_new) if isinstance(y, pd.Series): y = y.to_frame() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) if isinstance(y_train, pd.Series): y_train = y_train.to_frame() target = y_train.columns[0] if isinstance(y_test, pd.Series): y_test = y_test.to_frame() X_train[target] = y_train train = X_train test = X_test if task == 'classification': if len(y[y.columns[0]].unique()) > 2: pt = 'multiclass' f1 = lambda y_test, y_pred: f1_score( y_test, y_pred, average='weighted') else: pt = 'binary' f1 = lambda y_test, y_pred: f1_score(y_test, y_pred) else: pt = 'regression' #, path='/home/riccardo/.local/share/Trash' predictor = TabularPredictor(label=target, problem_type=pt).fit( train_data=train, time_limit=timelife * 60, presets=['optimize_for_deployment' ]) # TEMPORANEO -> attenzione salvo sul cestino results = predictor.fit_summary() y_pred = predictor.predict(test) pipelines = (predictor.leaderboard(df, silent=True)) # sono queste res = predictor.evaluate_predictions(y_true=y_test.squeeze(), y_pred=y_pred, auxiliary_metrics=True) shutil.rmtree('./AutogluonModels') if (task == 'classification'): '''y_test = le.fit_transform(y_test) y_pred = le.fit_transform(y_pred) if len(np.unique(y_pred)) > 2: f1 = f1_score(y_test, y_pred, average='weighted')s else: f1 = f1_score(y_test, y_pred) return (res['accuracy'], f1)''' return (res['accuracy'], f1(y_test, y_pred), pipelines) else: return (res['root_mean_squared_error'], res['r2'], pipelines)
train_data, **ag_fit_args) # --------------------------------------------------------------- Inference if args.test_dir: test_file = get_input_path(args.test_dir) test_data = TabularDataset(test_file) # Predictions y_pred_proba = predictor.predict_proba(test_data) if config.get("output_prediction_format", "csv") == "parquet": y_pred_proba.to_parquet( f"{args.output_data_dir}/predictions.parquet") else: y_pred_proba.to_csv(f"{args.output_data_dir}/predictions.csv") # Leaderboard if config.get("leaderboard", False): lb = predictor.leaderboard(test_data, silent=False) lb.to_csv(f"{args.output_data_dir}/leaderboard.csv") # Feature importance if config.get("feature_importance", False): feature_importance = predictor.feature_importance(test_data) feature_importance.to_csv( f"{args.output_data_dir}/feature_importance.csv") else: if config.get("leaderboard", False): lb = predictor.leaderboard(silent=False) lb.to_csv(f"{args.output_data_dir}/leaderboard.csv")
def run(dataset, config): log.info(f"\n**** AutoGluon [v{__version__}] ****\n") save_metadata(config, version=__version__) metrics_mapping = dict( acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2, rmse=metrics.root_mean_squared_error, ) label = dataset.target.name problem_type = dataset.problem_type perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) is_classification = config.type == 'classification' training_params = {k: v for k, v in config.framework_params.items() if not k.startswith('_')} load_raw = config.framework_params.get('_load_raw', False) if load_raw: train, test = load_data_raw(dataset=dataset) else: column_names, _ = zip(*dataset.columns) column_types = dict(dataset.columns) train = pd.DataFrame(dataset.train.data, columns=column_names).astype(column_types, copy=False) print(f"Columns dtypes:\n{train.dtypes}") test = pd.DataFrame(dataset.test.data, columns=column_names).astype(column_types, copy=False) del dataset gc.collect() output_dir = output_subdir("models", config) with utils.Timer() as training: predictor = TabularPredictor( label=label, eval_metric=perf_metric.name, path=output_dir, problem_type=problem_type, ).fit( train_data=train, time_limit=config.max_runtime_seconds, **training_params ) del train y_test = test[label] test = test.drop(columns=label) if is_classification: with utils.Timer() as predict: probabilities = predictor.predict_proba(test, as_multiclass=True) predictions = probabilities.idxmax(axis=1).to_numpy() else: with utils.Timer() as predict: predictions = predictor.predict(test, as_pandas=False) probabilities = None prob_labels = probabilities.columns.values.tolist() if probabilities is not None else None leaderboard = predictor.leaderboard(silent=True) # Removed test data input to avoid long running computation, remove 7200s timeout limitation to re-enable with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(leaderboard) save_artifacts(predictor, leaderboard, config) num_models_trained = len(leaderboard) if predictor._trainer.model_best is not None: num_models_ensemble = len(predictor._trainer.get_minimum_model_set(predictor._trainer.model_best)) else: num_models_ensemble = 1 return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, probabilities_labels=prob_labels, target_is_encoded=False, models_count=num_models_trained, models_ensemble_count=num_models_ensemble, training_duration=training.duration, predict_duration=predict.duration)
def inner_test_tabular(testname): # Find the named test test = None for t in tests: if t['name'] == testname: test = t assert test is not None, f"Could not find test {testname}" # Build the dataset (dftrain, dftest) = make_dataset(request=test, seed=0) # Check the synthetic dataset itself hasn't changed. We round it to 3dp otherwise tiny floating point differences # between platforms can give a different hash that still yields same prediction scores. # Ultimately it doesn't matter how we do this as long as the same dataset gives the same hash function on # different python versions and architectures. current_hash = hashlib.sha256( dftrain.round(decimals=3).values.tobytes()).hexdigest()[0:10] proposedconfig = "Proposed new config:\n" proposedconfig += f"'dataset_hash' : '{current_hash}'," assert current_hash == test[ 'dataset_hash'], f"Test '{testname}' input dataset has changed. All scores will change.\n" + proposedconfig # Now run the Predictor 1 or more times with various parameters, and make sure we get # back the expected results. # Params can either omitted, or a single run, or a list of runs. if 'params' not in test: test['params'] = {'predict': {}, 'fit': {}} if not isinstance(test['params'], list): test['params'] = [test['params']] for params in test['params']: # Run this model and set of params predictor = TabularPredictor(label='label', **params['predict']) predictor.fit(dftrain, **params['fit']) leaderboard = predictor.leaderboard(dftest, silent=True) leaderboard = leaderboard.sort_values( by='model' ) # So we can pre-generate sample config in alphabetical order # Store proposed new config based on the current run, in case the developer wants to keep thee results (just cut and paste). proposedconfig = "Proposed new config:\n" proposedconfig += "'expected_score_range' : {\n" for model in leaderboard['model']: midx_in_leaderboard = leaderboard.index.values[leaderboard['model'] == model][0] if np.isnan(leaderboard['score_test'][midx_in_leaderboard]): values = "np.nan, np.nan" else: if model in test['expected_score_range'] and not np.isnan( test['expected_score_range'][model][1]): currentprecision = test['expected_score_range'][model][1] else: currentprecision = 0.01 values = "{}, {}".format( myfloor(leaderboard['score_test'][midx_in_leaderboard], currentprecision), currentprecision) proposedconfig += f" '{model}': ({values}),\n" proposedconfig += "},\n" # First validate the model list was as expected. assert set(leaderboard['model']) == set( test['expected_score_range'].keys() ), (f"Test '{testname}' params {params} got unexpected model list.\n" + proposedconfig) # Now validate the scores for each model were as expected. all_assertions_met = True currentconfig = "Existing config:\n" currentconfig += "'expected_score_range' : {\n" for model in sorted(test['expected_score_range']): midx_in_leaderboard = leaderboard.index.values[leaderboard['model'] == model][0] assert leaderboard['model'][midx_in_leaderboard] == model expectedrange = test['expected_score_range'][model][1] expectedmin = test['expected_score_range'][model][0] expectedmax = expectedmin + expectedrange if np.isnan(expectedmin): values = "np.nan, np.nan" else: values = "{}, {}".format(expectedmin, expectedrange) if (( (leaderboard['score_test'][midx_in_leaderboard] >= expectedmin) and (leaderboard['score_test'][midx_in_leaderboard] <= expectedmax)) or (np.isnan(leaderboard['score_test'][midx_in_leaderboard]) and np.isnan(expectedmin))): currentconfig += f" '{model}': ({values}),\n" else: currentconfig += f" '{model}': ({values}), # <--- not met, got {leaderboard['score_test'][midx_in_leaderboard]} \n" all_assertions_met = False currentconfig += "},\n" assert all_assertions_met, f"Test '{testname}', params {params} had unexpected scores:\n" + currentconfig + proposedconfig # Clean up this model created with specific params. predictor.delete_models(models_to_keep=[], dry_run=False)
train_file = get_input_path(args.training_dir) train_data = TabularDataset(train_file) test_file = get_input_path(args.test_dir) test_data = TabularDataset(test_file) ag_predictor_args = config["ag_predictor_args"] ag_predictor_args["path"] = args.model_dir ag_fit_args = config["ag_fit_args"] predictor = TabularPredictor(**ag_predictor_args).fit( train_data, **ag_fit_args) logger.info("Best model: %s", predictor.get_model_best()) # Leaderboard lb = predictor.leaderboard() lb.to_csv(f'{args.output_data_dir}/leaderboard.csv', index=False) logger.info("Saved leaderboard to output.") # Feature importance feature_importance = predictor.feature_importance(test_data) feature_importance.to_csv(f'{args.output_data_dir}/feature_importance.csv') logger.info("Saved feature importance to output.") # Evaluation evaluation = predictor.evaluate(test_data) with open(f'{args.output_data_dir}/evaluation.json', 'w') as f: json.dump(evaluation, f) logger.info("Saved evaluation to output.") predictor.save_space()
def run(dataset, config): log.info(f"\n**** AutoGluon [v{__version__}] ****\n") metrics_mapping = dict( acc=metrics.accuracy, auc=metrics.roc_auc, f1=metrics.f1, logloss=metrics.log_loss, mae=metrics.mean_absolute_error, mse=metrics.mean_squared_error, r2=metrics.r2, rmse=metrics.root_mean_squared_error, ) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else None if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) is_classification = config.type == 'classification' training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } train, test = dataset.train.path, dataset.test.path label = dataset.target.name problem_type = dataset.problem_type models_dir = tempfile.mkdtemp() + os.sep # passed to AG with Timer() as training: predictor = TabularPredictor( label=label, eval_metric=perf_metric.name, path=models_dir, problem_type=problem_type, ).fit(train_data=train, time_limit=config.max_runtime_seconds, **training_params) del train if is_classification: with Timer() as predict: probabilities = predictor.predict_proba(test, as_multiclass=True) predictions = probabilities.idxmax(axis=1).to_numpy() else: with Timer() as predict: predictions = predictor.predict(test, as_pandas=False) probabilities = None prob_labels = probabilities.columns.values.astype( str).tolist() if probabilities is not None else None _leaderboard_extra_info = config.framework_params.get( '_leaderboard_extra_info', False) # whether to get extra model info (very verbose) _leaderboard_test = config.framework_params.get( '_leaderboard_test', False) # whether to compute test scores in leaderboard (expensive) leaderboard_kwargs = dict(silent=True, extra_info=_leaderboard_extra_info) # Disabled leaderboard test data input by default to avoid long running computation, remove 7200s timeout limitation to re-enable if _leaderboard_test: leaderboard_kwargs['data'] = test leaderboard = predictor.leaderboard(**leaderboard_kwargs) with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): log.info(leaderboard) num_models_trained = len(leaderboard) if predictor._trainer.model_best is not None: num_models_ensemble = len( predictor._trainer.get_minimum_model_set( predictor._trainer.model_best)) else: num_models_ensemble = 1 save_artifacts(predictor, leaderboard, config) shutil.rmtree(predictor.path, ignore_errors=True) return result(output_file=config.output_predictions_file, predictions=predictions, probabilities=probabilities, probabilities_labels=prob_labels, target_is_encoded=False, models_count=num_models_trained, models_ensemble_count=num_models_ensemble, training_duration=training.duration, predict_duration=predict.duration)
def fit_static(X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs): from autogluon.tabular import TabularDataset, TabularPredictor from autogluon.tabular.models.knn.knn_rapids_model import KNNRapidsModel from autogluon.tabular.models.lr.lr_rapids_model import LinearRapidsModel num_classes = kwargs['num_classes'] if kwargs['verbose'] is not None and kwargs['verbose'] is True: verbosity = 2 else: verbosity = 0 labels = kwargs['labels'] num_gpus = kwargs['n_gpus'] accuracy = kwargs.get('accuracy', 10) interpretability = kwargs.get('interpretability', 1) is_acceptance = kwargs.get('IS_ACCEPTANCE', False) is_backend_tuning = kwargs.get('IS_BACKEND_TUNING', False) lb = None if num_classes >= 2: from sklearn.preprocessing import LabelEncoder lb = LabelEncoder() lb.fit(labels) y = lb.transform(y) label = '____TARGET_____' import datatable as dt y_dt = dt.Frame(y, names=[label]) if eval_set is not None: valid_X = eval_set[0][0] valid_y = eval_set[0][1] if num_classes >= 2: valid_y = lb.transform(valid_y) valid_y_dt = dt.Frame(valid_y, names=[label]) assert X.shape[1] == valid_X.shape[1], "Bad shape to rbind: %s %s : %s %s" % ( X.shape, X.names, valid_X.shape, valid_X.names) X = dt.rbind([X, valid_X]) y_dt = dt.rbind([y_dt, valid_y_dt]) sw = None if sample_weight is not None: sw = '____SAMPLE_WEIGHT_____' sw_dt = dt.Frame(sample_weight, names=[sw]) if sample_weight_eval_set is not None: swes_dt = dt.Frame(sample_weight_eval_set[0], names=[sw]) sw_dt = dt.rbind([sw_dt, swes_dt]) X = dt.cbind([X, y_dt, sw_dt]) else: X = dt.cbind([X, y_dt]) X = X.to_pandas() # AutoGluon needs pandas, not numpy eval_metric = AutoGluonModel.get_eval_metric(**kwargs) time_limit = AutoGluonModel.get_time_limit(accuracy) presets = AutoGluonModel.get_presets(accuracy, interpretability, is_acceptance, is_backend_tuning) model = TabularPredictor( label=label, sample_weight=sw, eval_metric=eval_metric, verbosity=verbosity, # learner_kwargs={'ignored_columns': ['id']} ) hyperparameters = { KNNRapidsModel: {}, LinearRapidsModel: {}, 'RF': {}, 'XGB': {'ag_args_fit': {'num_gpus': num_gpus}}, 'CAT': {'ag_args_fit': {'num_gpus': num_gpus}}, 'GBM': [{}, {'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, 'GBMLarge'], 'NN': {'ag_args_fit': {'num_gpus': num_gpus}}, 'FASTAI': {'ag_args_fit': {'num_gpus': num_gpus}}, } kwargs_fit = dict(hyperparameters=hyperparameters) if accuracy >= 5: kwargs_fit.update(dict(presets=presets, time_limit=time_limit)) model.fit(X, **kwargs_fit) print(model.leaderboard(silent=True)) return model
train_data = train_data.head(500) # subsample for faster demo print(train_data.head()) label = 'age' # specifies which column do we want to predict save_path = 'ag_models/' # where to save trained models quantiles_topredict = [ 0.1, 0.5, 0.9 ] # which quantiles of numeric label-variable we want to predict predictor = TabularPredictor(label=label, path=save_path, problem_type='quantile', quantile_levels=quantiles_topredict) predictor.fit( train_data, time_limit=30 ) # time_limit is optional, you should increase it for real applications # Inference time: test_data = TabularDataset( 'https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv' ) # another Pandas DataFrame predictor = TabularPredictor.load( save_path ) # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file y_pred = predictor.predict(test_data) print(y_pred) # each column contains estimates for one target quantile-level ldr = predictor.leaderboard( test_data) # evaluate performance of every trained model print(f"Quantile-regression evaluated using metric = {predictor.eval_metric}")
if run_pred == True: save_path = 'agModels-predictClass' # specifies folder to store trained models predictor = TabularPredictor(label=label, path=save_path).fit(df_train, presets='best_quality') y_test = df_test[label] # values to predict test_data_nolab = df_test.drop( columns=[label]) # delete label column to prove we're not cheating predictor = TabularPredictor.load( save_path ) # unnecessary, just demonstrates how to load previously-trained predictor from file y_pred = predictor.predict(test_data_nolab) perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True) leaderboard = predictor.leaderboard(df_test, silent=True) st.dataframe(leaderboard) y_predproba = predictor.predict_proba(df_pred) # Enter text for testing s = 'pd.DataFrame' sample_dtypes = { 'list': [1, 'a', [2, 'c'], { 'b': 2 }], 'str': 'Hello Streamlit!', 'int': 17, 'float': 17.0, 'dict': { 1: 'a', 'x': [2, 'c'],
example_models_2 = {'RF': {}, 'KNN': {}} # Because auto_ml_pipeline_feature_generator is already fit, it doesn't need to be fit again in predictor. Instead, train_data is just transformed by auto_ml_pipeline_feature_generator.transform(train_data). # This allows the feature transformation to be completely independent of the training data, we could have used a completely different data source to fit the generator. predictor = TabularPredictor(label='class').fit( train_data, hyperparameters=example_models, feature_generator=auto_ml_pipeline_feature_generator) X_test_transform_2 = predictor.transform_features( X_test ) # This is the same as calling auto_ml_pipeline_feature_generator.transform(X_test) assert (X_test_transform.equals(X_test_transform_2)) # The feature metadata of the feature generator is also preserved. All downstream models will get this feature metadata information to make decisions on how they use the data. assert (predictor.feature_metadata.to_dict() == auto_ml_pipeline_feature_generator.feature_metadata.to_dict()) predictor.leaderboard(test_data) # We can train multiple predictors with the same pre-fit feature generator. This can save a lot of time during experimentation if the fitting of the generator is expensive. predictor_2 = TabularPredictor(label='class').fit( train_data, hyperparameters=example_models_2, feature_generator=auto_ml_pipeline_feature_generator) predictor_2.leaderboard(test_data) # We can even specify our custom generator too (although it needs to do a bit more to actually improve the scores, in most situations just use AutoMLPipelineFeatureGenerator) predictor_3 = TabularPredictor(label='class').fit( train_data, hyperparameters=example_models, feature_generator=plus_three_feature_generator) predictor_3.leaderboard(test_data)
def test_advanced_functionality(): fast_benchmark = True dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip', 'name': 'AdultIncomeBinaryClassification', 'problem_type': BINARY} label = 'class' directory_prefix = './datasets/' train_file = 'train_data.csv' test_file = 'test_data.csv' train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url']) if fast_benchmark: # subsample for fast_benchmark subsample_size = 100 train_data = train_data.head(subsample_size) test_data = test_data.head(subsample_size) print(f"Evaluating Advanced Functionality on Benchmark Dataset {dataset['name']}") directory = directory_prefix + 'advanced/' + dataset['name'] + "/" savedir = directory + 'AutogluonOutput/' shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed. predictor = TabularPredictor(label=label, path=savedir).fit(train_data) leaderboard = predictor.leaderboard(data=test_data) extra_metrics = ['accuracy', 'roc_auc', 'log_loss'] leaderboard_extra = predictor.leaderboard(data=test_data, extra_info=True, extra_metrics=extra_metrics) assert set(predictor.get_model_names()) == set(leaderboard['model']) assert set(predictor.get_model_names()) == set(leaderboard_extra['model']) assert set(leaderboard_extra.columns).issuperset(set(leaderboard.columns)) assert len(leaderboard) == len(leaderboard_extra) assert set(leaderboard_extra.columns).issuperset(set(extra_metrics)) # Assert that extra_metrics are present in output num_models = len(predictor.get_model_names()) feature_importances = predictor.feature_importance(data=test_data) original_features = set(train_data.columns) original_features.remove(label) assert set(feature_importances.index) == original_features assert set(feature_importances.columns) == {'importance', 'stddev', 'p_value', 'n', 'p99_high', 'p99_low'} predictor.transform_features() predictor.transform_features(data=test_data) predictor.info() assert predictor.get_model_names_persisted() == [] # Assert that no models were persisted during training assert predictor.unpersist_models() == [] # Assert that no models were unpersisted persisted_models = predictor.persist_models(models='all', max_memory=None) assert set(predictor.get_model_names_persisted()) == set(persisted_models) # Ensure all models are persisted assert predictor.persist_models(models='all', max_memory=None) == [] # Ensure that no additional models are persisted on repeated calls unpersised_models = predictor.unpersist_models() assert set(unpersised_models) == set(persisted_models) assert predictor.get_model_names_persisted() == [] # Assert that all models were unpersisted # Raise exception with pytest.raises(NetworkXError): predictor.persist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) assert predictor.get_model_names_persisted() == [] assert predictor.unpersist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) == [] predictor.persist_models(models='all', max_memory=None) predictor.save() # Save predictor while models are persisted: Intended functionality is that they won't be persisted when loaded. predictor_loaded = TabularPredictor.load(predictor.path) # Assert that predictor loading works leaderboard_loaded = predictor_loaded.leaderboard(data=test_data) assert len(leaderboard) == len(leaderboard_loaded) assert predictor_loaded.get_model_names_persisted() == [] # Assert that models were not still persisted after loading predictor assert(predictor.get_model_full_dict() == dict()) predictor.refit_full() assert(len(predictor.get_model_full_dict()) == num_models) assert(len(predictor.get_model_names()) == num_models * 2) for model in predictor.get_model_names(): predictor.predict(data=test_data, model=model) predictor.refit_full() # Confirm that refit_models aren't further refit. assert(len(predictor.get_model_full_dict()) == num_models) assert(len(predictor.get_model_names()) == num_models * 2) predictor.delete_models(models_to_keep=[]) # Test that dry-run doesn't delete models assert(len(predictor.get_model_names()) == num_models * 2) predictor.predict(data=test_data) predictor.delete_models(models_to_keep=[], dry_run=False) # Test that dry-run deletes models assert len(predictor.get_model_names()) == 0 assert len(predictor.leaderboard()) == 0 assert len(predictor.leaderboard(extra_info=True)) == 0 try: predictor.predict(data=test_data) except: pass else: raise AssertionError('predictor.predict should raise exception after all models are deleted') print('Tabular Advanced Functionality Test Succeeded.')
hyperparameters = {'RF': {}} train_data = train_data.head(1000) # subsample for faster demo ################################## # Fitting with the old Predictor # ################################## predictor1 = task.fit(train_data, label=label, eval_metric=eval_metric, hyperparameters=hyperparameters, num_bagging_folds=2) predictor1.leaderboard(test_data) ################################## # Fitting with the new Predictor # ################################## predictor2 = TabularPredictor(label, eval_metric=eval_metric).fit(train_data, hyperparameters=hyperparameters, num_bag_folds=2) predictor2.leaderboard(test_data) #################################### # Advanced fit_extra functionality # #################################### # Fit extra models at level 0, with 30 second time limit hyperparameters_extra1 = {'GBM': {}, 'NN': {}} predictor2.fit_extra(hyperparameters_extra1, time_limit=30) # Fit new level 1 stacker models that use the level 0 models from the original fit and the previous fit_extra call as base models hyperparameters_extra2 = {'CAT': {}, 'NN': {}} base_model_names = predictor2.get_model_names(stack_name='core', level=0) predictor2.fit_extra(hyperparameters_extra2, base_model_names=base_model_names) # Fit a new 3-layer stack ensemble on top of level 1 stacker models