def electra_base_search_aggregator() -> dict: cfg = update_5best(electra_base_no_hpo()) cfg['hpo_params']['num_trials'] = 10 cfg['models']['MultimodalTextModel']['search_space']['model.network.agg_net.agg_type']\ = space.Categorical('attention', 'mean', 'max', 'concat') cfg['models']['MultimodalTextModel']['search_space']['model.network.base_feature_units'] \ = space.Categorical(-1, 128) return cfg
def albert_base_grid_search() -> dict: cfg = update_5best(albert_base_no_hpo()) cfg['hpo_params']['num_trials'] = 12 cfg['hpo_params']['search_strategy'] = 'random' cfg['models']['MultimodalTextModel']['search_space'][ 'optimization.num_train_epochs'] = space.Categorical(5, 10) cfg['models']['MultimodalTextModel']['search_space'][ 'model.optimization.lr'] = space.Categorical(1E-4, 5E-5), cfg['models']['MultimodalTextModel']['search_space'][ 'model.optimization.wd'] = space.Categorical(0.01, 1E-4, 0.0), return cfg
def electra_base_grid_search() -> dict: cfg = update_5best(electra_base_no_hpo()) cfg['hpo_params']['num_trials'] = 12 cfg['hpo_params']['search_strategy'] = 'random' cfg['models']['MultimodalTextModel']['search_space'][ 'optimization.num_train_epochs'] = space.Categorical(5, 10) cfg['models']['MultimodalTextModel']['search_space'][ 'optimization.lr'] = space.Categorical(1E-4, 5E-5) cfg['models']['MultimodalTextModel']['search_space'][ 'optimization.layerwise_lr_decay'] = space.Categorical(0.8, 0.9, 1.0) cfg['models']['MultimodalTextModel']['search_space'][ 'optimization.wd'] = space.Categorical(0.01, 1E-4, 0.0) return cfg
def base() -> dict: """The default hyperparameters. It will have a version key and a list of candidate models. Each model has its own search space inside. """ ret = { 'models': { 'MultimodalTextModel': { 'backend': 'gluonnlp_v0', 'search_space': { 'model.backbone.name': 'google_electra_base', 'optimization.batch_size': 128, 'optimization.per_device_batch_size': 4, 'optimization.num_train_epochs': 10, 'optimization.lr': space.Categorical(1E-4), 'optimization.wd': 1E-4, 'optimization.layerwise_lr_decay': 0.8 } }, }, 'tune_kwargs': { # Same as the hyperparameter_tune_kwargs in AutoGluon Tabular. 'search_strategy': 'local', # Can be 'random', 'bayesopt', 'skopt', # 'hyperband', 'bayesopt_hyperband' 'searcher': 'random', 'search_options': None, # Extra kwargs passed to searcher 'scheduler_options': None, # Extra kwargs passed to scheduler 'num_trials': 1, # The number of trials }, } return ret
def default() -> dict: """The default hyperparameters. It will have a version key and a list of candidate models. Each model has its own search space inside. """ ret = { 'version': 1, # Version of TextPrediction Model 'models': { 'MultimodalTextModel': { 'backend': 'gluonnlp_v0', 'search_space': { 'model.backbone.name': 'google_electra_base', 'optimization.batch_size': 128, 'optimization.per_device_batch_size': 8, 'optimization.num_train_epochs': 10, 'optimization.lr': space.Categorical(5E-5), 'optimization.wd': 1E-4, 'optimization.layerwise_lr_decay': 0.8 } }, }, 'misc': { 'holdout_frac': None, # If it is not provided, we will use the default strategy. }, 'hpo_params': { 'search_strategy': 'random', # Can be 'random', 'bayesopt', 'skopt', # 'hyperband', 'bayesopt_hyperband' 'search_options': None, # Extra kwargs passed to searcher 'scheduler_options': None, # Extra kwargs passed to scheduler 'time_limits': None, # The total time limit 'num_trials': 1, # The number of trials }, 'seed': None, # The seed value } return ret
def small_config_hpo_for_test(): cfg = small_config_no_hpo_for_test() cfg['models']['MultimodalTextModel']['search_space']['optimization.lr']\ = space.Categorical(1E-4, 1E-5) return cfg
def fit(self, train_data, tuning_data=None, time_limit=None, presets=None, hyperparameters=None, column_types=None, num_cpus=None, num_gpus=None, num_trials=None, plot_results=None, holdout_frac=None, save_path=None, seed=0): """ Fit Transformer models to predict label column of a data table based on the other columns (which may contain text or numeric/categorical features). Parameters ---------- train_data : str or :class:`TabularDataset` or :class:`pd.DataFrame` Table of the training data, which is similar to a pandas DataFrame. If str is passed, `train_data` will be loaded using the str value as the file path. tuning_data : str or :class:`TabularDataset` or :class:`pd.DataFrame`, default = None Another dataset containing validation data reserved for tuning processes such as early stopping and hyperparameter tuning. This dataset should be in the same format as `train_data`. If str is passed, `tuning_data` will be loaded using the str value as the file path. Note: final model returned may be fit on `tuning_data` as well as `train_data`. Do not provide your evaluation test data here! If `tuning_data = None`, `fit()` will automatically hold out some random validation examples from `train_data`. time_limit : int, default = None Approximately how long `fit()` should run for (wallclock time in seconds). If not specified, `fit()` will run until the model has completed training. presets : str, default = None Presets are pre-registered configurations that control training (hyperparameters and other aspects). It is recommended to specify presets and avoid specifying most other `fit()` arguments or model hyperparameters prior to becoming familiar with AutoGluon. Print all available presets via `autogluon.text.list_presets()`. Some notable presets include: - "best_quality": produce the most accurate overall predictor (regardless of its efficiency). - "medium_quality_faster_train": produce an accurate predictor but take efficiency into account (this is the default preset). - "lower_quality_fast_train": produce a predict that is quick to train and make predictions with, even if its accuracy is worse. hyperparameters : dict, default = None The hyperparameters of the `fit()` function, which affect the resulting accuracy of the trained predictor. Experienced AutoGluon users can use this argument to specify neural network hyperparameter values/search-spaces as well as which hyperparameter-tuning strategy should be employed. See the "Text Prediction" tutorials for examples. column_types : dict, default = None The type of data in each table column can be specified via a dictionary that maps the column name to its data type. For example: `column_types = {"item_name": "text", "brand": "text", "product_description": "text", "height": "numerical"}` may be used for a table with columns: "item_name", "brand", "product_description", and "height". If None, column_types will be automatically inferred from the data. The current supported types are: - "text": each row in this column contains text (sentence, paragraph, etc.). - "numerical": each row in this column contains a number. - "categorical": each row in this column belongs to one of K categories. num_cpus : int, default = None The number of CPUs to use for each training run (i.e. one hyperparameter-tuning trial). num_gpus : int, default = None The number of GPUs to use to use for each training run (i.e. one hyperparameter-tuning trial). We recommend at least 1 GPU for TextPredictor as its neural network models are computationally intensive. num_trials : int, default = None If hyperparameter-tuning is used, specifies how many HPO trials should be run (assuming `time_limit` has not been exceeded). By default, this is the provided number of trials in the `hyperparameters` or `presets`. If specified here, this value will overwrite the value in `hyperparameters['tune_kwargs']['num_trials']`. plot_results : bool, default = None Whether to plot intermediate results from training. If None, will be decided based on the environment in which `fit()` is run. holdout_frac : float, default = None Fraction of train_data to holdout as tuning data for optimizing hyperparameters (ignored unless `tuning_data = None`). Default value (if None) is selected based on the number of rows in the training data and whether hyperparameter-tuning is utilized. save_path : str, default = None The path for auto-saving the models' weights seed : int, default = 0 The random seed to use for this training run. If None, no seed will be specified and repeated runs will produce different results. Returns ------- :class:`TextPredictor` object. Returns self. """ assert self._fit_called is False is_continue_training = self._model is not None verbosity = self.verbosity if verbosity is None: verbosity = 3 if save_path is not None: self._path = setup_outputdir(save_path, warn_if_exist=True) if is_continue_training: # We have entered the continue training / transfer learning setting because the model is not None. logger.info('Continue training the existing model...') assert presets is None, 'presets is not supported in the continue training setting.' flat_dict = self._model.config.to_flat_dict() flat_dict['optimization.lr'] = space.Categorical(flat_dict['optimization.lr']) existing_hparams = {'models': {'MultimodalTextModel': {'search_space': flat_dict}}} existing_hparams = merge_params(ag_text_presets.create('default'), existing_hparams) hyperparameters = merge_params(existing_hparams, hyperparameters) # Check that the merged hyperparameters matches with the existing hyperparameters. # Here, we ensure that the model configurations remain the same. for key in hyperparameters['models']['MultimodalTextModel']['search_space']: if key in existing_hparams and (key.startswith('model.') or key.startswith('preprocessing.')): new_value = hyperparameters['models']['MultimodalTextModel']['search_space'][key] old_value = existing_hparams['models']['MultimodalTextModel']['search_space'][key] assert new_value == old_value,\ f'The model architecture / preprocessing logic is not allowed to change in the ' \ f'continue training mode. ' \ f'"{key}" is changed to be "{new_value}" from "{old_value}". ' \ f'Please check the specified hyperparameters = {hyperparameters}' else: if presets is not None: preset_hparams = ag_text_presets.create(presets) else: preset_hparams = ag_text_presets.create('default') hyperparameters = merge_params(preset_hparams, hyperparameters) if num_trials is not None: hyperparameters['tune_kwargs']['num_trials'] = num_trials if isinstance(self._label, str): label_columns = [self._label] else: label_columns = list(self._label) # Get the training and tuning data as pandas dataframe if isinstance(train_data, str): train_data = load_pd.load(train_data) if not isinstance(train_data, pd.DataFrame): raise AssertionError(f'train_data is required to be a pandas DataFrame, but was instead: {type(train_data)}') all_columns = list(train_data.columns) feature_columns = [ele for ele in all_columns if ele not in label_columns] train_data = train_data[all_columns] # Get tuning data if tuning_data is not None: if isinstance(tuning_data, str): tuning_data = load_pd.load(tuning_data) if not isinstance(tuning_data, pd.DataFrame): raise AssertionError(f'tuning_data is required to be a pandas DataFrame, but was instead: {type(tuning_data)}') tuning_data = tuning_data[all_columns] else: if holdout_frac is None: num_trials = hyperparameters['tune_kwargs']['num_trials'] if num_trials == 1: holdout_frac = default_holdout_frac(len(train_data), False) else: # For HPO, we will need to use a larger held-out ratio holdout_frac = default_holdout_frac(len(train_data), True) train_data, tuning_data = train_test_split(train_data, test_size=holdout_frac, random_state=np.random.RandomState(seed)) if is_continue_training: assert set(label_columns) == set(self._model.label_columns),\ f'Label columns do not match. Inferred label column from data = {set(label_columns)}.' \ f' Label column in model = {set(self._model.label_columns)}' for col_name in self._model.feature_columns: assert col_name in feature_columns, f'In the loaded model, "{col_name}" is a feature column,' \ f' but there is ' \ f'no such column in the DataFrame.' model_hparams = hyperparameters['models']['MultimodalTextModel'] if plot_results is None: plot_results = in_ipynb() self._model.train(train_data=train_data, tuning_data=tuning_data, num_cpus=num_cpus, num_gpus=num_gpus, search_space=model_hparams['search_space'], tune_kwargs=hyperparameters['tune_kwargs'], time_limit=time_limit, continue_training=True, seed=seed, plot_results=plot_results, verbosity=verbosity) else: column_types, problem_type = infer_column_problem_types(train_data, tuning_data, label_columns=label_columns, problem_type=self._problem_type, provided_column_types=column_types) self._eval_metric, log_metrics = infer_eval_log_metrics(problem_type=problem_type, eval_metric=self._eval_metric) has_text_column = False for k, v in column_types.items(): if v == _C.TEXT: has_text_column = True break if not has_text_column: raise AssertionError('No Text Column is found! This is currently not supported by ' 'the TextPredictor. You may try to use ' 'autogluon.tabular.TabularPredictor.\n' 'The inferred column properties of the training data is {}' .format(column_types)) logger.info('Problem Type="{}"'.format(problem_type)) logger.info(printable_column_type_string(column_types)) self._problem_type = problem_type if 'models' not in hyperparameters or 'MultimodalTextModel' not in hyperparameters['models']: raise ValueError('The current TextPredictor only supports "MultimodalTextModel" ' 'and you must ensure that ' 'hyperparameters["models"]["MultimodalTextModel"] can be accessed.') model_hparams = hyperparameters['models']['MultimodalTextModel'] self._backend = model_hparams['backend'] if plot_results is None: plot_results = in_ipynb() if self._backend == 'gluonnlp_v0': import warnings warnings.filterwarnings('ignore', module='mxnet') from ..mx.models import MultiModalTextModel self._model = MultiModalTextModel(column_types=column_types, feature_columns=feature_columns, label_columns=label_columns, problem_type=self._problem_type, eval_metric=self._eval_metric, log_metrics=log_metrics, output_directory=self._path) self._model.train(train_data=train_data, tuning_data=tuning_data, num_cpus=num_cpus, num_gpus=num_gpus, search_space=model_hparams['search_space'], tune_kwargs=hyperparameters['tune_kwargs'], time_limit=time_limit, seed=seed, plot_results=plot_results, verbosity=verbosity) else: raise NotImplementedError("Currently, we only support using " "the autogluon-contrib-nlp and MXNet " "as the backend of AutoGluon-Text. In the future, " "we will support other models.") logger.info(f'Training completed. Auto-saving to "{self.path}". ' f'For loading the model, you can use' f' `predictor = TextPredictor.load("{self.path}")`') self.save(self.path) return self