def predict_proba(self, X, preprocess=True): from fastai.basic_data import DatasetType from fastai.tabular import TabularList from fastai.utils.mod_display import progress_disabled_ctx from fastai.tabular import FillMissing, Categorify, Normalize if preprocess: X = self.preprocess(X) procs = [FillMissing, Categorify, Normalize] self.model.data.add_test( TabularList.from_df(X, cat_names=self.cat_columns, cont_names=self.cont_columns, procs=procs)) with progress_disabled_ctx(self.model) as model: preds, _ = model.get_preds(ds_type=DatasetType.Test) if self.problem_type == REGRESSION: if self.y_scaler is not None: return self.y_scaler.inverse_transform( preds.numpy()).reshape(-1) else: return preds.numpy().reshape(-1) if self.problem_type == BINARY: return preds[:, 1].numpy() else: return preds.numpy()
def _predict_proba(self, X, **kwargs): from fastai.basic_data import DatasetType from fastai.tabular import TabularList from fastai.utils.mod_display import progress_disabled_ctx X = self.preprocess(X, **kwargs) single_row = len(X) == 1 # fastai has issues predicting on a single row, duplicating the row as a workaround if single_row: X = pd.concat([X, X]).reset_index(drop=True) # Copy cat_columns and cont_columns because TabularList is mutating the list self.model.data.add_test( TabularList.from_df(X, cat_names=self.cat_columns.copy(), cont_names=self.cont_columns.copy(), procs=self.procs)) with progress_disabled_ctx(self.model) as model: preds, _ = model.get_preds(ds_type=DatasetType.Test) if single_row: preds = preds[:1, :] if self.problem_type == REGRESSION: if self.y_scaler is not None: return self.y_scaler.inverse_transform( preds.numpy()).reshape(-1) else: return preds.numpy().reshape(-1) if self.problem_type == BINARY: return preds[:, 1].numpy() else: return preds.numpy()
def train(self, graph, max_epoch=100, min_delta=0, patience=0): model_num = self._model_num self._model_num = self._model_num + 1 learn = Learner(self.data, graph.generate_model(), loss_func=self.loss_func, metrics=self.metrics, callback_fns=[partial(ValueTrackingCallback, value_holder=self.accuracy, monitor=self.monitor, min_delta=min_delta, patience=patience)]) progress_disabled_ctx(learn) learn.fit(max_epoch) print(f'Saving model {model_num}...', end='') graph.save(os.path.join(self.path, str(model_num))) print(' Done!') print(f'Model number: {model_num}\nBest accuracy: {self.accuracy.value}') return model_num, self.accuracy.value.item()
def evaluation_fn(parameters): # lr = self.get_param_value('learning_rate') # num_epochs = self.get_param_value('num_epochs') # moms = (self.get_param_value('momentum0'), self.get_param_value('momentum1')) # ps = self.get_param_value('dropout_ps') # wd = self.get_param_value('weight_decay') # use_bn = self.get_param_value('use_bn') lr = (eval(DashVerum.v_resp['learning_rate']['default']) if not DashVerum.v_resp['learning_rate']['flag'] else parameters['learning_rate']) num_epochs = (DashVerum.v_resp['num_epochs']['default'] if not DashVerum.v_resp['num_epochs']['flag'] else parameters['num_epochs']) moms = ((DashVerum.v_resp['momentum0']['default'] if not DashVerum.v_resp['momentum0']['flag'] else parameters['momentum0']), (DashVerum.v_resp['momentum1']['default'] if not DashVerum.v_resp['learning_rate']['flag'] else parameters['momentum1'])) ps = (DashVerum.v_resp['dropout_ps']['default'] if not DashVerum.v_resp['learning_rate']['flag'] else parameters['dropout_ps']) # wd = ( # DashVerum.v_resp['weight_decay']['default'] if not DashVerum.v_resp['weight_decay']['flag'] # else parameters['weight_decay'] # ) use_bn = (DashVerum.v_resp['use_bn']['default'] if not DashVerum.v_resp['use_bn']['flag'] else parameters['use_bn']) # learn = load_learner('./','verum_test.pkl') # learn.data = self.data with open('./data/response.json') as f: response = json.load(f) application = response['task'] save_dir = Path(response['save']['save_dir']) save_name = Path(response['save']['save_name']) learner_class = learner_class_map[application] learn = getattr(learner_class, f'create_{application}_learner')(response) learn.model.ps = ps # learn.model.wd = wd learn.model.use_bn = use_bn validation_set = learn.data.valid_dl learn.data.valid_dl = None with progress_disabled_ctx(learn) as learn: learn.fit_one_cycle(num_epochs, max_lr=lr, moms=moms) learn.data.valid_dl = validation_set if DashVerum.v_resp['metric']['name'] == 'error': metric = learn.validate()[0] else: metric = learn.validate( metrics=eval(DashVerum.v_resp['metric']['name']))[0] return metric
def train(self, **kwargs): """ Train self.learner model. """ self.update_params(**kwargs) self.init_model() frozen_epochs = self.train_params['frozen_epochs'] unfrozen_epochs = self.train_params['unfrozen_epochs'] frozen_lr = self.train_params['frozen_lr'] unfrozen_lr = self.train_params['unfrozen_lr'] if self.progressbar: self.learner.fit_one_cycle(frozen_epochs, frozen_lr) self.learner.unfreeze() self.learner.fit_one_cycle(unfrozen_epochs, unfrozen_lr) else: with progress_disabled_ctx(self.learner) as self.learner: self.learner.fit_one_cycle(frozen_epochs, frozen_lr) self.learner.unfreeze() self.learner.fit_one_cycle(unfrozen_epochs, unfrozen_lr) self.is_fitted = True
def _fit(self, X_train, y_train, X_val, y_val, time_limit=None, **kwargs): try_import_fastai_v1() from fastai.layers import LabelSmoothingCrossEntropy from fastai.tabular import tabular_learner from fastai.utils.mod_display import progress_disabled_ctx from .callbacks import EarlyStoppingCallbackWithTimeLimit, SaveModelCallback start_time = time.time() self.y_scaler = self.params.get('y_scaler', None) if self.y_scaler is not None: self.y_scaler = copy.deepcopy(self.y_scaler) logger.log(15, f'Fitting Neural Network with parameters {self.params}...') data = self.preprocess_train(X_train, y_train, X_val, y_val) nn_metric, objective_func_name = self.__get_objective_func_name() objective_func_name_to_monitor = self.__get_objective_func_to_monitor( objective_func_name) objective_optim_mode = 'min' if objective_func_name in [ 'root_mean_squared_error', 'mean_squared_error', 'mean_absolute_error', 'r2' # Regression objectives ] else 'auto' # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value if self.params.get('layers', None) is not None: layers = self.params['layers'] elif self.problem_type in [REGRESSION, BINARY]: layers = [200, 100] else: base_size = max(len(data.classes) * 2, 100) layers = [base_size * 2, base_size] loss_func = None if self.problem_type in [BINARY, MULTICLASS ] and self.params.get('smoothing', 0.0) > 0.0: loss_func = LabelSmoothingCrossEntropy(self.params['smoothing']) ps = self.params['ps'] if type(ps) != list: ps = [ps] if time_limit: time_elapsed = time.time() - start_time time_left = time_limit - time_elapsed else: time_left = None early_stopping_fn = partial( EarlyStoppingCallbackWithTimeLimit, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, min_delta=self.params['early.stopping.min_delta'], patience=self.params['early.stopping.patience'], time_limit=time_left) self.model = tabular_learner(data, layers=layers, ps=ps, emb_drop=self.params['emb_drop'], metrics=nn_metric, loss_func=loss_func, callback_fns=[early_stopping_fn]) logger.log(15, self.model.model) with make_temp_directory() as temp_dir: save_callback = SaveModelCallback( self.model, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, name=self.name) with progress_disabled_ctx(self.model) as model: original_path = model.path model.path = Path(temp_dir) model.fit_one_cycle(self.params['epochs'], self.params['lr'], callbacks=save_callback) # Load the best one and export it model.load(self.name) if objective_func_name == 'log_loss': eval_result = model.validate()[0] else: eval_result = model.validate()[1].numpy().reshape(-1)[0] logger.log(15, f'Model validation metrics: {eval_result}') model.path = original_path
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_cpus=None, num_gpus=0, sample_weight=None, **kwargs): try_import_fastai_v1() import torch from fastai.layers import LabelSmoothingCrossEntropy from fastai.tabular import tabular_learner from fastai.utils.mod_display import progress_disabled_ctx from fastai.core import defaults from .callbacks import EarlyStoppingCallbackWithTimeLimit, SaveModelCallback start_time = time.time() if sample_weight is not None: # TODO: support logger.log( 15, "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training." ) params = self.params.copy() self.y_scaler = params.get('y_scaler', None) if self.y_scaler is not None: self.y_scaler = copy.deepcopy(self.y_scaler) if num_cpus is None: num_cpus = defaults.cpus # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = int(num_cpus / 2) if not is_fork_enabled(): num_workers = 0 if num_gpus is not None: if num_gpus == 0: # TODO: Does not obviously impact inference speed defaults.device = torch.device('cpu') else: defaults.device = torch.device('cuda') logger.log(15, f'Fitting Neural Network with parameters {params}...') data = self._preprocess_train(X, y, X_val, y_val, num_workers=num_workers) nn_metric, objective_func_name = self.__get_objective_func_name() objective_func_name_to_monitor = self.__get_objective_func_to_monitor( objective_func_name) objective_optim_mode = 'min' if objective_func_name in [ 'root_mean_squared_error', 'mean_squared_error', 'mean_absolute_error', 'r2' # Regression objectives ] else 'auto' # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value if params.get('layers', None) is not None: layers = params['layers'] elif self.problem_type in [REGRESSION, BINARY]: layers = [200, 100] else: base_size = max(len(data.classes) * 2, 100) layers = [base_size * 2, base_size] loss_func = None if self.problem_type in [BINARY, MULTICLASS ] and params.get('smoothing', 0.0) > 0.0: loss_func = LabelSmoothingCrossEntropy(params['smoothing']) ps = params['ps'] if type(ps) != list: ps = [ps] if time_limit: time_elapsed = time.time() - start_time time_left = time_limit - time_elapsed else: time_left = None best_epoch_stop = params.get("best_epoch", None) # Use best epoch for refit_full. early_stopping_fn = partial( EarlyStoppingCallbackWithTimeLimit, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, min_delta=params['early.stopping.min_delta'], patience=params['early.stopping.patience'], time_limit=time_left, best_epoch_stop=best_epoch_stop) self.model = tabular_learner(data, layers=layers, ps=ps, emb_drop=params['emb_drop'], metrics=nn_metric, loss_func=loss_func, callback_fns=[early_stopping_fn]) logger.log(15, self.model.model) with make_temp_directory() as temp_dir: save_callback = SaveModelCallback( self.model, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, name=self.name, best_epoch_stop=best_epoch_stop) with progress_disabled_ctx(self.model) as model: original_path = model.path model.path = Path(temp_dir) model.fit_one_cycle(params['epochs'], params['lr'], callbacks=save_callback) # Load the best one and export it model.load(self.name) if objective_func_name == 'log_loss': eval_result = model.validate()[0] else: eval_result = model.validate()[1].numpy().reshape(-1)[0] logger.log(15, f'Model validation metrics: {eval_result}') model.path = original_path self.params_trained['best_epoch'] = save_callback.best_epoch