def test_random_skip_oom(self): automl_experiment = AutoML() automl_experiment.add_learner(learner_name="large_lgbm", learner_class=MyLargeLGBM) automl_settings = { "time_budget": 2, "task": "classification", "log_file_name": "test/sparse_classification_oom.log", "estimator_list": ["large_lgbm"], "log_type": "all", "n_jobs": 1, "hpo_method": "random", "n_concurrent_trials": 2, } X_train = scipy.sparse.eye(900000) y_train = np.random.randint(2, size=900000) try: automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("large_lgbm")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) except ImportError: print("skipping concurrency test as ray is not installed") return
def test_sparse_matrix_lr(self): automl_experiment = AutoML() automl_settings = { "time_budget": 3, "metric": "f1", "task": "classification", "log_file_name": "test/sparse_classification.log", "estimator_list": ["lrl1", "lrl2"], "log_type": "all", "n_jobs": 1, } X_train = scipy.sparse.random(3000, 3000, density=0.1) y_train = np.random.randint(2, size=3000) automl_experiment.fit(X_train=X_train, y_train=y_train, train_time_limit=1, **automl_settings) automl_settings["time_budget"] = 5 automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("lrl2")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator)
def test_parallel_xgboost(self, hpo_method=None): automl_experiment = AutoML() automl_settings = { "time_budget": 10, "metric": "ap", "task": "classification", "log_file_name": "test/sparse_classification.log", "estimator_list": ["xgboost"], "log_type": "all", "n_jobs": 1, "n_concurrent_trials": 2, "hpo_method": hpo_method, } X_train = scipy.sparse.eye(900000) y_train = np.random.randint(2, size=900000) try: import ray X_train_ref = ray.put(X_train) automl_experiment.fit(X_train=X_train_ref, y_train=y_train, **automl_settings) print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("xgboost")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) except ImportError: return
def test_classification(self, as_frame=False): automl_experiment = AutoML() automl_settings = { "time_budget": 4, "metric": 'accuracy', "task": 'classification', "log_file_name": "test/iris.log", "log_training_metric": True, "model_history": True } X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.classes_) print(automl_experiment.predict_proba(X_train)[:5]) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.model_history) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) del automl_settings["metric"] del automl_settings["model_history"] del automl_settings["log_training_metric"] automl_experiment = AutoML() duration = automl_experiment.retrain_from_log( log_file_name=automl_settings["log_file_name"], X_train=X_train, y_train=y_train, train_full=True, record_id=0) print(duration) print(automl_experiment.model) print(automl_experiment.predict_proba(X_train)[:5])
def test_training_log(self): with TemporaryDirectory() as d: filename = os.path.join(d, 'test_training_log.log') # Run a simple job. automl_experiment = AutoML() automl_settings = { "time_budget": 2, "metric": 'mse', "task": 'regression', "log_file_name": filename, "log_training_metric": True, "mem_thres": 1024*1024, "n_jobs": 1, "model_history": True } X_train, y_train = load_boston(return_X_y=True) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) # Check if the training log file is populated. self.assertTrue(os.path.exists(filename)) with training_log_reader(filename) as reader: count = 0 for record in reader.records(): print(record) count += 1 self.assertGreater(count, 0)
def test_micro_macro_f1(self): automl_experiment_micro = AutoML() automl_experiment_macro = AutoML() automl_settings = { "time_budget": 2, "task": "classification", "log_file_name": "test/micro_macro_f1.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = load_iris(return_X_y=True) automl_experiment_micro.fit( X_train=X_train, y_train=y_train, metric="micro_f1", **automl_settings ) automl_experiment_macro.fit( X_train=X_train, y_train=y_train, metric="macro_f1", **automl_settings ) estimator = automl_experiment_macro.model y_pred = estimator.predict(X_train) y_pred_proba = estimator.predict_proba(X_train) from flaml.ml import norm_confusion_matrix, multi_class_curves print(norm_confusion_matrix(y_train, y_pred)) from sklearn.metrics import roc_curve, precision_recall_curve print(multi_class_curves(y_train, y_pred_proba, roc_curve)) print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve))
def test_ray_classification(self): X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) automl = AutoML() try: automl.fit( X_train, y_train, X_val=X_test, y_val=y_test, time_budget=10, task="classification", use_ray=True, ) automl.fit( X_train, y_train, X_val=X_test, y_val=y_test, time_budget=10, task="classification", n_concurrent_trials=2, ) except ImportError: return
def test_sparse_matrix_regression(self): automl_experiment = AutoML() automl_settings = { "time_budget": 2, "metric": 'mae', "task": 'regression', "log_file_name": "test/sparse_regression.log", "model_history": True } X_train = scipy.sparse.random(300, 900, density=0.0001) y_train = np.random.uniform(size=300) X_val = scipy.sparse.random(100, 900, density=0.0001) y_val = np.random.uniform(size=100) automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings) assert automl_experiment.X_val.shape == X_val.shape print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.model_history) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) print(automl_experiment.best_config) print(automl_experiment.best_loss) print(automl_experiment.best_config_train_time)
def test_custom_metric(self): automl_experiment = AutoML() automl_settings = { "time_budget": 10, 'eval_method': 'holdout', "metric": custom_metric, "task": 'classification', "log_file_name": "test/iris_custom.log", "log_training_metric": True, 'log_type': 'all', "model_history": True } X_train, y_train = load_iris(return_X_y=True) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.classes_) print(automl_experiment.predict_proba(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.model_history) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) automl_experiment = AutoML() estimator = automl_experiment.get_estimator_from_log( automl_settings["log_file_name"], record_id=0, objective='multi') print(estimator) time_history, best_valid_loss_history, valid_loss_history, \ config_history, train_loss_history = get_output_from_log( filename=automl_settings['log_file_name'], time_budget=6) print(train_loss_history)
def test_regression_xgboost(self): X_train = scipy.sparse.random(300, 900, density=0.0001) y_train = np.random.uniform(size=300) X_val = scipy.sparse.random(100, 900, density=0.0001) y_val = np.random.uniform(size=100) automl_experiment = AutoML() automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1) automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2) automl_settings = { "time_budget": 2, "estimator_list": ["my_xgb1", "my_xgb2"], "task": "regression", "log_file_name": "test/regression_xgboost.log", "n_jobs": 1, "model_history": True, "keep_search_state": True, "early_stop": True, } automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings) assert automl_experiment._state.X_val.shape == X_val.shape print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("my_xgb2")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) print(automl_experiment.best_config) print(automl_experiment.best_loss) print(automl_experiment.best_config_train_time)
def test_numpy(): X_train = np.arange("2014-01", "2021-01", dtype="datetime64[M]") y_train = np.random.random(size=len(X_train)) automl = AutoML() automl.fit( X_train=X_train[:72], # a single column of timestamp y_train=y_train[:72], # value for each timestamp period=12, # time horizon to forecast, e.g., 12 months task="ts_forecast", time_budget=3, # time budget in seconds log_file_name="test/ts_forecast.log", n_splits=3, # number of splits ) print(automl.predict(X_train[72:])) automl = AutoML() automl.fit( X_train=X_train[:72], # a single column of timestamp y_train=y_train[:72], # value for each timestamp period=12, # time horizon to forecast, e.g., 12 months task="ts_forecast", time_budget=1, # time budget in seconds estimator_list=["arima", "sarimax"], log_file_name="test/ts_forecast.log", ) print(automl.predict(X_train[72:])) # an alternative way to specify predict steps for arima/sarimax print(automl.predict(12))
def test_sparse_matrix_regression(self): X_train = scipy.sparse.random(300, 900, density=0.0001) y_train = np.random.uniform(size=300) X_val = scipy.sparse.random(100, 900, density=0.0001) y_val = np.random.uniform(size=100) automl_experiment = AutoML() automl_settings = { "time_budget": 2, "metric": "mae", "task": "regression", "log_file_name": "test/sparse_regression.log", "n_jobs": 1, "model_history": True, "keep_search_state": True, "verbose": 0, "early_stop": True, } automl_experiment.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings) assert automl_experiment._state.X_val.shape == X_val.shape print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("rf")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) print(automl_experiment.best_config) print(automl_experiment.best_loss) print(automl_experiment.best_config_train_time)
def test_regression(self): automl_experiment = AutoML() automl_settings = { "time_budget": 2, "metric": 'mse', "task": 'regression', "log_file_name": "test/boston.log", "log_training_metric": True, "model_history": True } X_train, y_train = load_boston(return_X_y=True) n = len(y_train) automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1], X_val=X_train[n >> 1:], y_val=y_train[n >> 1:], **automl_settings) assert automl_experiment.y_val.shape[0] == n - (n >> 1) assert automl_experiment.eval_method == 'holdout' print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.model_history) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) print(get_output_from_log(automl_settings["log_file_name"], 1))
def run(dataset, config): log.info(f"\n**** FLAML [v{__version__}] ****\n") X_train, y_train = dataset.train.X, dataset.train.y.squeeze() X_test, y_test = dataset.test.X, dataset.test.y.squeeze() is_classification = config.type == 'classification' time_budget = config.max_runtime_seconds n_jobs = config.framework_params.get('_n_jobs', config.cores) log.info("Running FLAML with {} number of cores".format(config.cores)) aml = AutoML() # Mapping of benchmark metrics to flaml metrics metrics_mapping = dict( acc='accuracy', auc='roc_auc', f1='f1', logloss='log_loss', mae='mae', mse='mse', rmse='rmse', r2='r2', ) perf_metric = metrics_mapping[ config.metric] if config.metric in metrics_mapping else 'auto' if perf_metric is None: log.warning("Performance metric %s not supported.", config.metric) training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } log_dir = output_subdir("logs", config) flaml_log_file_name = os.path.join(log_dir, "flaml.log") with Timer() as training: aml.fit(X_train, y_train, metric=perf_metric, task=config.type, n_jobs=n_jobs, log_file_name=flaml_log_file_name, time_budget=time_budget, **training_params) with Timer() as predict: predictions = aml.predict(X_test) probabilities = aml.predict_proba(X_test) if is_classification else None labels = aml.classes_ if is_classification else None return result( output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test, models_count=len(aml.config_history), training_duration=training.duration, predict_duration=predict.duration, probabilities_labels=labels, )
def test_fit_w_starting_point(self, as_frame=True): automl_experiment = AutoML() automl_settings = { "time_budget": 3, "metric": "accuracy", "task": "classification", "log_file_name": "test/iris.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = load_iris(return_X_y=True, as_frame=as_frame) if as_frame: # test drop column X_train.columns = range(X_train.shape[1]) X_train[X_train.shape[1]] = np.zeros(len(y_train)) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) automl_val_accuracy = 1.0 - automl_experiment.best_loss print("Best ML leaner:", automl_experiment.best_estimator) print("Best hyperparmeter config:", automl_experiment.best_config) print("Best accuracy on validation data: {0:.4g}".format(automl_val_accuracy)) print( "Training duration of best run: {0:.4g} s".format( automl_experiment.best_config_train_time ) ) starting_points = automl_experiment.best_config_per_estimator print("starting_points", starting_points) print("loss of the starting_points", automl_experiment.best_loss_per_estimator) automl_settings_resume = { "time_budget": 2, "metric": "accuracy", "task": "classification", "log_file_name": "test/iris_resume.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, "log_type": "all", "starting_points": starting_points, } new_automl_experiment = AutoML() new_automl_experiment.fit( X_train=X_train, y_train=y_train, **automl_settings_resume ) new_automl_val_accuracy = 1.0 - new_automl_experiment.best_loss print("Best ML leaner:", new_automl_experiment.best_estimator) print("Best hyperparmeter config:", new_automl_experiment.best_config) print( "Best accuracy on validation data: {0:.4g}".format(new_automl_val_accuracy) ) print( "Training duration of best run: {0:.4g} s".format( new_automl_experiment.best_config_train_time ) )
def _test_custom_data(): from flaml import AutoML import requests import pandas as pd try: train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3) dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3) test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3) except requests.exceptions.HTTPError: return custom_sent_keys = ["#1 String", "#2 String"] label_key = "Quality" X_train = train_dataset[custom_sent_keys] y_train = train_dataset[label_key] X_val = dev_dataset[custom_sent_keys] y_val = dev_dataset[label_key] X_test = test_dataset[custom_sent_keys] automl = AutoML() automl_settings = { "gpu_per_trial": 0, "max_iter": 3, "time_budget": 5, "task": "seq-classification", "metric": "accuracy", } automl_settings["custom_hpo_args"] = { "model_path": "google/electra-small-discriminator", "output_dir": "data/output/", "ckpt_per_epoch": 1, } automl.fit(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings) automl.predict(X_test) automl.predict(["test test"]) automl.predict([ ["test test", "test test"], ["test test", "test test"], ["test test", "test test"], ])
def _test_ray_classification(): from sklearn.datasets import make_classification X, y = make_classification(1000, 10) automl = AutoML() automl.fit(X, y, time_budget=10, task="classification", n_concurrent_trials=2)
def test_roc_auc_ovo(self): automl_experiment = AutoML() automl_settings = { "time_budget": 1, "metric": "roc_auc_ovo", "task": "classification", "log_file_name": "test/roc_auc_ovo.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = load_iris(return_X_y=True) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
def test_custom_metric(self): df, y = load_iris(return_X_y=True, as_frame=True) df["label"] = y automl_experiment = AutoML() automl_settings = { "dataframe": df, "label": "label", "time_budget": 5, "eval_method": "cv", "metric": custom_metric, "task": "classification", "log_file_name": "test/iris_custom.log", "log_training_metric": True, "log_type": "all", "n_jobs": 1, "model_history": True, "sample_weight": np.ones(len(y)), "pred_time_limit": 1e-5, "ensemble": True, } automl_experiment.fit(**automl_settings) print(automl_experiment.classes_) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("rf")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) automl_experiment = AutoML() estimator = automl_experiment.get_estimator_from_log( automl_settings["log_file_name"], record_id=0, task="multi" ) print(estimator) ( time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history, ) = get_output_from_log( filename=automl_settings["log_file_name"], time_budget=6 ) print(metric_history) try: import ray df = ray.put(df) automl_settings["dataframe"] = df automl_settings["use_ray"] = True automl_experiment.fit(**automl_settings) except ImportError: pass
def test_cv(): from flaml import AutoML import pandas as pd import requests train_data = { "sentence1": [ 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", ], "sentence2": [ 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", ], "label": [1, 0, 1, 0], "idx": [0, 1, 2, 3], } train_dataset = pd.DataFrame(train_data) custom_sent_keys = ["sentence1", "sentence2"] label_key = "label" X_train = train_dataset[custom_sent_keys] y_train = train_dataset[label_key] automl = AutoML() automl_settings = { "gpu_per_trial": 0, "max_iter": 3, "time_budget": 5, "task": "seq-classification", "metric": "accuracy", "n_splits": 3, } automl_settings["custom_hpo_args"] = { "model_path": "google/electra-small-discriminator", "output_dir": "test/data/output/", "ckpt_per_epoch": 1, "fp16": False, } try: automl.fit(X_train=X_train, y_train=y_train, **automl_settings) except requests.exceptions.HTTPError: return
def test_numpy_large(): import numpy as np import pandas as pd from flaml import AutoML X_train = pd.date_range("2017-01-01", periods=70000, freq="T") y_train = pd.DataFrame(np.random.randint(6500, 7500, 70000)) automl = AutoML() automl.fit( X_train=X_train[:-10].values, # a single column of timestamp y_train=y_train[:-10].values, # value for each timestamp period=10, # time horizon to forecast, e.g., 12 months task="ts_forecast", time_budget=10, # time budget in seconds )
def test_roc_auc_ovr(self): automl_experiment = AutoML() X_train, y_train = load_iris(return_X_y=True) automl_settings = { "time_budget": 1, "metric": "roc_auc_ovr", "task": "classification", "log_file_name": "test/roc_auc_ovr.log", "log_training_metric": True, "n_jobs": 1, "sample_weight": np.ones(len(y_train)), "eval_method": "holdout", "model_history": True, } automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
def test_binary(self): automl_experiment = AutoML() automl_settings = { "time_budget": 1, "task": "binary", "log_file_name": "test/breast_cancer.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = load_breast_cancer(return_X_y=True) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) _ = automl_experiment.predict(X_train)
def test_custom_learner(self): automl = AutoML() automl.add_learner(learner_name='RGF', learner_class=MyRegularizedGreedyForest) X_train, y_train = load_wine(return_X_y=True) settings = { "time_budget": 10, # total running time in seconds "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], "task": 'classification', # task type "sample": True, # whether to subsample training data "log_file_name": "test/wine.log", "log_training_metric": True, # whether to log training metric "n_jobs": 1, } '''The main flaml automl API''' automl.fit(X_train=X_train, y_train=y_train, **settings)
def test_mlflow(): import subprocess import sys subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) import mlflow from flaml.data import load_openml_task try: X_train, X_test, y_train, y_test = load_openml_task( task_id=7592, data_dir="test/" ) except (OpenMLServerException, ChunkedEncodingError) as e: print(e) return """ import AutoML class from flaml package """ from flaml import AutoML automl = AutoML() settings = { "time_budget": 5, # total running time in seconds "metric": "accuracy", # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2'] "estimator_list": ["lgbm", "rf", "xgboost"], # list of ML learners "task": "classification", # task type "sample": False, # whether to subsample training data "log_file_name": "adult.log", # flaml log file } mlflow.set_experiment("flaml") with mlflow.start_run() as run: automl.fit(X_train=X_train, y_train=y_train, **settings) mlflow.sklearn.log_model(automl, "automl") loaded_model = mlflow.pyfunc.load_model(f"{run.info.artifact_uri}/automl") print(loaded_model.predict(X_test)) automl._mem_thres = 0 print(automl.trainable(automl.points_to_evaluate[0])) settings["use_ray"] = True try: with mlflow.start_run() as run: automl.fit(X_train=X_train, y_train=y_train, **settings) mlflow.sklearn.log_model(automl, "automl") automl = mlflow.sklearn.load_model(f"{run.info.artifact_uri}/automl") print(automl.predict_proba(X_test)) except ImportError: pass
def _test_memory_limit(self): automl_experiment = AutoML() automl_experiment.add_learner( learner_name="large_lgbm", learner_class=MyLargeLGBM ) automl_settings = { "time_budget": -1, "task": "classification", "log_file_name": "test/classification_oom.log", "estimator_list": ["large_lgbm"], "log_type": "all", "hpo_method": "random", } X_train, y_train = load_iris(return_X_y=True, as_frame=True) automl_experiment.fit( X_train=X_train, y_train=y_train, max_iter=1, **automl_settings ) print(automl_experiment.model)
def test_logging_level(self): from flaml import logger, logger_formatter with tempfile.TemporaryDirectory() as d: training_log = os.path.join(d, "training.log") # Configure logging for the FLAML logger # and add a handler that outputs to a buffer. logger.setLevel(logging.INFO) buf = io.StringIO() ch = logging.StreamHandler(buf) ch.setFormatter(logger_formatter) logger.addHandler(ch) # Run a simple job. automl = AutoML() automl_settings = { "time_budget": 1, "metric": 'mse', "task": 'regression', "log_file_name": training_log, "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = load_boston(return_X_y=True) n = len(y_train) >> 1 automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings) # Check if the log buffer is populated. self.assertTrue(len(buf.getvalue()) > 0) import pickle with open('automl.pkl', 'wb') as f: pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL) print(automl.__version__)
def test_ensemble(self): automl = AutoML() automl.add_learner(learner_name="RGF", learner_class=MyRegularizedGreedyForest) X_train, y_train = load_wine(return_X_y=True) settings = { "time_budget": 5, # total running time in seconds "estimator_list": ["rf", "xgboost", "catboost"], "task": "classification", # task type "sample": True, # whether to subsample training data "log_file_name": "test/wine.log", "log_training_metric": True, # whether to log training metric "ensemble": { "final_estimator": MyRegularizedGreedyForest(), "passthrough": False, }, "n_jobs": 1, } """The main flaml automl API""" automl.fit(X_train=X_train, y_train=y_train, **settings)
def test_regression(self): automl_experiment = AutoML() automl_settings = { "time_budget": 2, "task": "regression", "log_file_name": "test/california.log", "log_training_metric": True, "n_jobs": 1, "model_history": True, } X_train, y_train = fetch_california_housing(return_X_y=True) n = int(len(y_train) * 9 // 10) automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings) assert automl_experiment._state.eval_method == "holdout" print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.best_model_for_estimator("xgboost")) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator) print(get_output_from_log(automl_settings["log_file_name"], 1)) automl_experiment.retrain_from_log( task="regression", log_file_name=automl_settings["log_file_name"], X_train=X_train, y_train=y_train, train_full=True, time_budget=1, ) automl_experiment.retrain_from_log( task="regression", log_file_name=automl_settings["log_file_name"], X_train=X_train, y_train=y_train, train_full=True, time_budget=0, )
def test_sparse_matrix_regression_cv(self): automl_experiment = AutoML() automl_settings = { "time_budget": 2, 'eval_method': 'cv', "task": 'regression', "log_file_name": "test/sparse_regression.log", "model_history": True } X_train = scipy.sparse.random(100, 100) y_train = np.random.uniform(size=100) automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl_experiment.predict(X_train)) print(automl_experiment.model) print(automl_experiment.config_history) print(automl_experiment.model_history) print(automl_experiment.best_iteration) print(automl_experiment.best_estimator)