def test_tabularHPObagstack(): ############ Benchmark options you can set: ######################## perf_threshold = 1.1 # How much worse can performance on each dataset be vs previous performance without warning seed_val = 10000 # random seed subsample_size = None hyperparameter_tune = True stack_ensemble_levels = 2 num_bagging_folds = 2 verbosity = 2 # how much output to print hyperparameters = None time_limits = None num_trials = None fast_benchmark = True # False # If True, run a faster benchmark (subsample training sets, less epochs, etc), # otherwise we run full benchmark with default AutoGluon settings. # performance_value warnings are disabled when fast_benchmark = True. #### If fast_benchmark = True, can control model training time here. Only used if fast_benchmark=True #### if fast_benchmark: subsample_size = 100 nn_options = { 'num_epochs': 2, 'learning_rate': ag.Real(0.001, 0.01), 'lr_scheduler': ag.Categorical(None, 'cosine', 'step') } gbm_options = { 'num_boost_round': 20, 'learning_rate': ag.Real(0.01, 0.1) } hyperparameters = {'GBM': gbm_options, 'NN': nn_options} time_limits = 150 num_trials = 3 fit_args = { 'num_bagging_folds': num_bagging_folds, 'stack_ensemble_levels': stack_ensemble_levels, 'hyperparameter_tune': hyperparameter_tune, 'verbosity': verbosity, } if hyperparameters is not None: fit_args['hyperparameters'] = hyperparameters if time_limits is not None: fit_args['time_limits'] = time_limits fit_args['num_bagging_sets'] = 2 if num_trials is not None: fit_args['num_trials'] = num_trials ################################################################### run_tabular_benchmarks(fast_benchmark=fast_benchmark, subsample_size=subsample_size, perf_threshold=perf_threshold, seed_val=seed_val, fit_args=fit_args)
def test_search_space(): @ag.obj( name=ag.space.Categorical('auto', 'gluon'), ) class myobj: def __init__(self, name): self.name = name @ag.func( framework=ag.space.Categorical('mxnet', 'pytorch'), ) def myfunc(framework): return framework @ag.args( a=ag.space.Real(1e-3, 1e-2, log=True), b=ag.space.Real(1e-3, 1e-2), c=ag.space.Int(1, 10), d=ag.space.Categorical('a', 'b', 'c', 'd'), e=ag.space.Bool(), f=ag.space.List( ag.space.Int(1, 2), ag.space.Categorical(4, 5), ), g=ag.space.Dict( a=ag.Real(0, 10), obj=myobj(), ), h=ag.space.Categorical('test', myobj()), i=myfunc(), ) def train_fn(args, reporter): a, b, c, d, e, f, g, h, i = args.a, args.b, args.c, args.d, args.e, \ args.f, args.g, args.h, args.i assert a <= 1e-2 and a >= 1e-3 assert b <= 1e-2 and b >= 1e-3 assert c <= 10 and c >= 1 assert d in ['a', 'b', 'c', 'd'] assert e in [True, False] assert f[0] in [1, 2] assert f[1] in [4, 5] assert g['a'] <= 10 and g['a'] >= 0 assert g.obj.name in ['auto', 'gluon'] assert hasattr(h, 'name') or h == 'test' assert i in ['mxnet', 'pytorch'] reporter(epoch=1, accuracy=0) scheduler = ag.scheduler.FIFOScheduler(train_fn, resource={'num_cpus': 4, 'num_gpus': 0}, num_trials=10, reward_attr='accuracy', time_attr='epoch', checkpoint=None) scheduler.run() scheduler.join_jobs()
from autogluon.tabular import TabularPrediction as task # Training time: train_data = task.Dataset( file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv' ) # can be local CSV file as well, returns Pandas DataFrame train_data = train_data.head(100) # subsample for faster demo print(train_data.head()) label_column = 'class' # specifies which column do we want to predict savedir = 'ag_hpo_models/' # where to save trained models hyperparams = { 'NN': { 'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0, 0.5) }, 'GBM': { 'num_boost_round': 1000, 'learning_rate': ag.Real(0.01, 0.1, log=True) } } predictor = task.fit( train_data=train_data, label=label_column, output_directory=savedir, hyperparameter_tune=True, hyperparameters=hyperparams, num_trials=5, time_limits=1 * 60,
Note that all settings demonstrated here are just chosen for demonstration purposes (to minimize runtime), and do not represent wise choices to use in practice. To maximize predictive accuracy, we recommend you do NOT specify `hyperparameters` or `hyperparameter_tune`, and instead only specify the following fit() arguments: eval_metric=YOUR_METRIC, presets='best_quality' """ import autogluon.core as ag from autogluon.tabular import TabularPrediction as task # Training time: train_data = task.Dataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame train_data = train_data.head(100) # subsample for faster demo print(train_data.head()) label_column = 'class' # specifies which column do we want to predict savedir = 'ag_hpo_models/' # where to save trained models hyperparams = {'NN': {'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0, 0.5)}, 'GBM': {'num_boost_round': 1000, 'learning_rate': ag.Real(0.01, 0.1,log=True)}, 'XGB': {'n_estimators': 1000, 'learning_rate': ag.Real(0.01, 0.1,log=True)}} predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir, hyperparameter_tune=True, hyperparameters=hyperparams, num_trials=5, time_limits=1*60, num_bagging_folds=0, stack_ensemble_levels=0) # since tuning_data = None, automatically determines train/validation split results = predictor.fit_summary() # display detailed summary of fit() process print(results) # Inference time: test_data = task.Dataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame print(test_data.head()) perf = predictor.evaluate(test_data) # shorthand way to evaluate our predictor if test-labels available
# specify hyperparameter search space config = { 'task': 'ssd', 'dataset': args.dataset, 'estimator': 'ssd', 'base_network': None, 'transfer': ag.Categorical('ssd_512_vgg16_atrous_coco', 'ssd_300_resnet34_v1b_coco', 'ssd_512_resnet50_v1_coco', 'ssd_512_resnet101_v2_voc'), 'lr': ag.Real(1e-4, 1e-2, log=True), 'batch_size': ag.Int(3, 6), # [8, 16, 32, 64] 'momentum': ag.Real(0.85, 0.95), 'wd': ag.Real(1e-6, 1e-2, log=True), 'epochs': 20, 'num_trials': args.num_trials, 'search_strategy': 'bayesopt' } # specify learning task
def fit(dataset, net=ag.Categorical('ResNet50_v1b', 'ResNet18_v1b'), optimizer=NAG(learning_rate=ag.Real(1e-3, 1e-2, log=True), wd=ag.Real(1e-4, 1e-3, log=True), multi_precision=False), loss=SoftmaxCrossEntropyLoss(), split_ratio=0.8, batch_size=64, input_size=224, epochs=20, final_fit_epochs=None, ensemble=1, metric='accuracy', nthreads_per_trial=60, ngpus_per_trial=1, hybridize=True, scheduler_options=None, search_strategy='random', search_options=None, plot_results=False, verbose=False, num_trials=None, time_limits=None, resume=False, output_directory='checkpoint/', visualizer='none', dist_ip_addrs=None, auto_search=True, lr_config=ag.Dict(lr_mode='cosine', lr_decay=0.1, lr_decay_period=0, lr_decay_epoch='40,80', warmup_lr=0.0, warmup_epochs=0), tricks=ag.Dict(last_gamma=False, use_pretrained=True, use_se=False, mixup=False, mixup_alpha=0.2, mixup_off_epoch=0, label_smoothing=False, no_wd=False, teacher_name=None, temperature=20.0, hard_weight=0.5, batch_norm=False, use_gn=False), **kwargs): # TODO: ensemble and hybridize are not in docstring """ Fit image classification models to a given dataset. Parameters ---------- dataset : str or :meth:`autogluon.task.ImageClassification.Dataset` Training dataset containing images and their associated class labels. Popular image datasets built into AutoGluon can be used by specifying their name as a string (options: ‘mnist’, ‘fashionmnist’, ‘cifar’, ‘cifar10’, ‘cifar100’, ‘imagenet’). input_size : int Size of images in the dataset (pixels). net : str or :class:`autogluon.space.Categorical` Which existing neural network models to consider as candidates. optimizer : str or :class:`autogluon.space.AutoGluonObject` Which optimizers to consider as candidates for learning the neural network weights. batch_size : int How many images to group in each mini-batch during gradient computations in training. epochs: int How many epochs to train the neural networks for at most. final_fit_epochs: int, default None Final fit epochs, the same number of epochs will be used as during the HPO if not specified. metric : str or callable object Evaluation metric by which predictions will be ulitmately evaluated on test data. loss : `mxnet.gluon.loss` Loss function used during training of the neural network weights. num_trials : int Maximal number of hyperparameter configurations to try out. time_limits : int Approximately how long `fit()` should run for (wallclock time in seconds). `fit()` will stop training new models after this amount of time has elapsed (but models which have already started training will continue to completion). split_ratio : float, default = 0.8 Fraction of dataset to use for training (rest of data is held-out for tuning hyperparameters). The final returned model may be fit to all of the data (after hyperparameters have been selected). nthreads_per_trial : int How many CPUs to use in each trial (ie. single training run of a model). ngpus_per_trial : int How many GPUs to use in each trial (ie. single training run of a model). output_directory : str Checkpoints of the search state are written to os.path.join(output_directory, 'exp1.ag') scheduler_options : dict Extra arguments passed to __init__ of scheduler, to configure the orchestration of training jobs during hyperparameter-tuning. search_strategy : str, default = None Which hyperparameter search algorithm to use. Options include: 'random' (random search), 'bayesopt' (Gaussian process Bayesian optimization), 'skopt' (SKopt Bayesian optimization), 'grid' (grid search), 'hyperband' (Hyperband scheduling with random search), 'bayesopt-hyperband' (Hyperband scheduling with GP-BO search). If unspecified, the default is 'random'. search_options : dict Auxiliary keyword arguments to pass to the searcher that performs hyperparameter optimization. resume : bool If True, the hyperparameter search is started from state loaded from os.path.join(output_directory, 'exp1.ag') dist_ip_addrs : list List of IP addresses corresponding to remote workers, in order to leverage distributed computation. verbose : bool Whether or not to print out intermediate information during training. plot_results : bool Whether or not to generate plots summarizing training process. visualizer : str Describes method to visualize training progress during `fit()`. Options: ['mxboard', 'tensorboard', 'none']. auto_search : bool If True, enables automatic suggestion of network types and hyper-parameter ranges adaptively based on provided dataset. Returns ------- :class:`autogluon.task.image_classification.Classifier` object which can make predictions on new data and summarize what happened during `fit()`. Examples -------- >>> from autogluon.vision import ImageClassification as task >>> dataset = task.Dataset(train_path='data/train', >>> test_path='data/test') >>> classifier = task.fit(dataset, >>> nets=ag.space.Categorical['resnet18_v1', 'resnet34_v1'], >>> time_limits=time_limits, >>> ngpus_per_trial=1, >>> num_trials = 4) >>> test_data = task.Dataset('~/data/test', train=False) >>> test_acc = classifier.evaluate(test_data) Bag of tricks are used on image classification dataset lr_config ---------- lr-mode : type=str, default='step'. describes how learning rate should be adjusted over the course of training. Options include: 'cosine', 'poly'. lr-decay : type=float, default=0.1. decay rate of learning rate. default is 0.1. lr-decay-period : type=int, default=0. interval for periodic learning rate decays. default is 0 to disable. lr-decay-epoch : type=str, default='10,20,30'. epochs at which learning rate decays. epochs=40, default is 10, 20, 30. warmup-lr : type=float, default=0.0. starting warmup learning rate. default is 0.0. warmup-epochs : type=int, default=0. number of warmup epochs. tricks ---------- last-gamma', default= True. whether to init gamma of the last BN layer in each bottleneck to 0. use-pretrained', default= True. enable using pretrained model from gluon. use_se', default= False. use SE layers or not in resnext. default is false. mixup', default= False. whether train the model with mix-up. default is false. mixup-alpha', type=float, default=0.2. beta distribution parameter for mixup sampling, default is 0.2. mixup-off-epoch', type=int, default=0. how many last epochs to train without mixup, default is 0. label-smoothing', default= True. use label smoothing or not in training. default is false. no-wd', default= True. whether to remove weight decay on bias, and beta/gamma for batchnorm layers. teacher', type=str, default=None. teacher model for distillation training temperature', type=float, default=20. temperature parameter for distillation teacher model hard-weight', type=float, default=0.5. weight for the loss of one-hot label for distillation training batch-norm', default= True. enable batch normalization or not in vgg. default is false. use-gn', default= False. whether to use group norm. """ checkpoint = os.path.join(output_directory, 'exp1.ag') if auto_search: # The strategies can be injected here, for example: automatic suggest some hps # based on the dataset statistics net = auto_suggest_network(dataset, net) nthreads_per_trial = get_cpu_count( ) if nthreads_per_trial > get_cpu_count() else nthreads_per_trial ngpus_per_trial = get_gpu_count( ) if ngpus_per_trial > get_gpu_count() else ngpus_per_trial # If only time_limits is given, the scheduler starts trials until the # time limit is reached if num_trials is None and time_limits is None: num_trials = 2 final_fit_epochs = final_fit_epochs if final_fit_epochs else epochs train_image_classification.register_args( dataset=dataset, net=net, optimizer=optimizer, loss=loss, metric=metric, num_gpus=ngpus_per_trial, split_ratio=split_ratio, batch_size=batch_size, input_size=input_size, epochs=epochs, final_fit_epochs=final_fit_epochs, verbose=verbose, num_workers=nthreads_per_trial, hybridize=hybridize, final_fit=False, tricks=tricks, lr_config=lr_config) # Backward compatibility: grace_period = kwargs.get('grace_period') if grace_period is not None: if scheduler_options is None: scheduler_options = {'grace_period': grace_period} else: assert 'grace_period' not in scheduler_options, \ "grace_period appears both in scheduler_options and as direct argument" scheduler_options = copy.copy(scheduler_options) scheduler_options['grace_period'] = grace_period logger.warning("grace_period is deprecated, use " "scheduler_options={'grace_period': ...} instead") scheduler_options = compile_scheduler_options( scheduler_options=scheduler_options, search_strategy=search_strategy, search_options=search_options, nthreads_per_trial=nthreads_per_trial, ngpus_per_trial=ngpus_per_trial, checkpoint=checkpoint, num_trials=num_trials, time_out=time_limits, resume=resume, visualizer=visualizer, time_attr='epoch', reward_attr='classification_reward', dist_ip_addrs=dist_ip_addrs, epochs=epochs) results = BaseTask.run_fit(train_image_classification, search_strategy, scheduler_options, plot_results=plot_results) args = sample_config(train_image_classification.args, results['best_config']) kwargs = {'num_classes': results['num_classes'], 'ctx': mx.cpu(0)} model = get_network(args.net, **kwargs) multi_precision = optimizer.kwvars[ 'multi_precision'] if 'multi_precision' in optimizer.kwvars else False update_params(model, results.pop('model_params'), multi_precision) if ensemble > 1: models = [model] scheduler = create_scheduler(train_image_classification, search_strategy, scheduler_options) for i in range(1, ensemble): resultsi = scheduler.run_with_config(results['best_config']) kwargs = { 'num_classes': resultsi['num_classes'], 'ctx': mx.cpu(0) } model = get_network(args.net, **kwargs) update_params(model, resultsi.pop('model_params'), multi_precision) models.append(model) model = Ensemble(models) results.pop('args') args.pop('optimizer') args.pop('dataset') args.pop('loss') return Classifier(model, results, default_val_fn, checkpoint, args)
parser.add_argument('--num-trials', type=int, default=3, help='number of training trials') args = parser.parse_args() logging.info('user defined arguments: {}'.format(args)) # specify hyperparameter search space config = { 'task': 'yolo3', 'dataset': args.dataset, 'estimator': 'yolo3', 'base_network': None, 'transfer': ag.Categorical('yolo3_darknet53_voc', 'yolo3_darknet53_coco'), 'lr': ag.Real(1e-4, 1e-2, log=True), 'batch_size': ag.Int(3, 6), # [8, 16, 32, 64] 'momentum': ag.Real(0.85, 0.95), 'wd': ag.Real(1e-6, 1e-2, log=True), 'epochs': 20, 'num_trials': args.num_trials, 'search_strategy': 'bayesopt' } # specify learning task task = ObjectDetection(config) # specify dataset dataset = Dataset.get(args.dataset) train_data, valid_data = dataset.split(0.8)