def predict_proba(self, X, preprocess=True):
        from fastai.basic_data import DatasetType
        from fastai.tabular import TabularList
        from fastai.utils.mod_display import progress_disabled_ctx
        from fastai.tabular import FillMissing, Categorify, Normalize

        if preprocess:
            X = self.preprocess(X)
        procs = [FillMissing, Categorify, Normalize]
        self.model.data.add_test(
            TabularList.from_df(X,
                                cat_names=self.cat_columns,
                                cont_names=self.cont_columns,
                                procs=procs))
        with progress_disabled_ctx(self.model) as model:
            preds, _ = model.get_preds(ds_type=DatasetType.Test)
        if self.problem_type == REGRESSION:
            if self.y_scaler is not None:
                return self.y_scaler.inverse_transform(
                    preds.numpy()).reshape(-1)
            else:
                return preds.numpy().reshape(-1)
        if self.problem_type == BINARY:
            return preds[:, 1].numpy()
        else:
            return preds.numpy()
Exemple #2
0
    def _predict_proba(self, X, **kwargs):
        from fastai.basic_data import DatasetType
        from fastai.tabular import TabularList
        from fastai.utils.mod_display import progress_disabled_ctx

        X = self.preprocess(X, **kwargs)

        single_row = len(X) == 1
        # fastai has issues predicting on a single row, duplicating the row as a workaround
        if single_row:
            X = pd.concat([X, X]).reset_index(drop=True)

        # Copy cat_columns and cont_columns because TabularList is mutating the list
        self.model.data.add_test(
            TabularList.from_df(X,
                                cat_names=self.cat_columns.copy(),
                                cont_names=self.cont_columns.copy(),
                                procs=self.procs))
        with progress_disabled_ctx(self.model) as model:
            preds, _ = model.get_preds(ds_type=DatasetType.Test)
        if single_row:
            preds = preds[:1, :]
        if self.problem_type == REGRESSION:
            if self.y_scaler is not None:
                return self.y_scaler.inverse_transform(
                    preds.numpy()).reshape(-1)
            else:
                return preds.numpy().reshape(-1)
        if self.problem_type == BINARY:
            return preds[:, 1].numpy()
        else:
            return preds.numpy()
Exemple #3
0
 def train(self, graph, max_epoch=100, min_delta=0, patience=0):
     model_num = self._model_num
     self._model_num = self._model_num + 1
     learn = Learner(self.data, graph.generate_model(), loss_func=self.loss_func, metrics=self.metrics,
                     callback_fns=[partial(ValueTrackingCallback,
                                           value_holder=self.accuracy,
                                           monitor=self.monitor,
                                           min_delta=min_delta,
                                           patience=patience)])
     progress_disabled_ctx(learn)
     learn.fit(max_epoch)
     print(f'Saving model {model_num}...', end='')
     graph.save(os.path.join(self.path, str(model_num)))
     print(' Done!')
     print(f'Model number: {model_num}\nBest accuracy: {self.accuracy.value}')
     return model_num, self.accuracy.value.item()
Exemple #4
0
    def evaluation_fn(parameters):
        # lr = self.get_param_value('learning_rate')
        # num_epochs = self.get_param_value('num_epochs')
        # moms = (self.get_param_value('momentum0'), self.get_param_value('momentum1'))
        # ps = self.get_param_value('dropout_ps')
        # wd = self.get_param_value('weight_decay')
        # use_bn = self.get_param_value('use_bn')

        lr = (eval(DashVerum.v_resp['learning_rate']['default'])
              if not DashVerum.v_resp['learning_rate']['flag'] else
              parameters['learning_rate'])
        num_epochs = (DashVerum.v_resp['num_epochs']['default']
                      if not DashVerum.v_resp['num_epochs']['flag'] else
                      parameters['num_epochs'])
        moms = ((DashVerum.v_resp['momentum0']['default']
                 if not DashVerum.v_resp['momentum0']['flag'] else
                 parameters['momentum0']),
                (DashVerum.v_resp['momentum1']['default']
                 if not DashVerum.v_resp['learning_rate']['flag'] else
                 parameters['momentum1']))
        ps = (DashVerum.v_resp['dropout_ps']['default']
              if not DashVerum.v_resp['learning_rate']['flag'] else
              parameters['dropout_ps'])
        # wd = (
        # 	DashVerum.v_resp['weight_decay']['default'] if not DashVerum.v_resp['weight_decay']['flag']
        # 	else parameters['weight_decay']
        # )
        use_bn = (DashVerum.v_resp['use_bn']['default']
                  if not DashVerum.v_resp['use_bn']['flag'] else
                  parameters['use_bn'])

        # learn = load_learner('./','verum_test.pkl')
        # learn.data = self.data
        with open('./data/response.json') as f:
            response = json.load(f)
        application = response['task']
        save_dir = Path(response['save']['save_dir'])
        save_name = Path(response['save']['save_name'])
        learner_class = learner_class_map[application]
        learn = getattr(learner_class,
                        f'create_{application}_learner')(response)

        learn.model.ps = ps
        # learn.model.wd = wd
        learn.model.use_bn = use_bn

        validation_set = learn.data.valid_dl
        learn.data.valid_dl = None

        with progress_disabled_ctx(learn) as learn:
            learn.fit_one_cycle(num_epochs, max_lr=lr, moms=moms)

        learn.data.valid_dl = validation_set
        if DashVerum.v_resp['metric']['name'] == 'error':
            metric = learn.validate()[0]
        else:
            metric = learn.validate(
                metrics=eval(DashVerum.v_resp['metric']['name']))[0]

        return metric
Exemple #5
0
    def train(self, **kwargs):
        """
        Train self.learner model.
        """
        self.update_params(**kwargs)
        self.init_model()

        frozen_epochs = self.train_params['frozen_epochs']
        unfrozen_epochs = self.train_params['unfrozen_epochs']
        frozen_lr = self.train_params['frozen_lr']
        unfrozen_lr = self.train_params['unfrozen_lr']

        if self.progressbar:
            self.learner.fit_one_cycle(frozen_epochs, frozen_lr)
            self.learner.unfreeze()
            self.learner.fit_one_cycle(unfrozen_epochs, unfrozen_lr)

        else:
            with progress_disabled_ctx(self.learner) as self.learner:
                self.learner.fit_one_cycle(frozen_epochs, frozen_lr)
                self.learner.unfreeze()
                self.learner.fit_one_cycle(unfrozen_epochs, unfrozen_lr)

        self.is_fitted = True
    def _fit(self, X_train, y_train, X_val, y_val, time_limit=None, **kwargs):
        try_import_fastai_v1()
        from fastai.layers import LabelSmoothingCrossEntropy
        from fastai.tabular import tabular_learner
        from fastai.utils.mod_display import progress_disabled_ctx
        from .callbacks import EarlyStoppingCallbackWithTimeLimit, SaveModelCallback

        start_time = time.time()

        self.y_scaler = self.params.get('y_scaler', None)
        if self.y_scaler is not None:
            self.y_scaler = copy.deepcopy(self.y_scaler)

        logger.log(15,
                   f'Fitting Neural Network with parameters {self.params}...')
        data = self.preprocess_train(X_train, y_train, X_val, y_val)

        nn_metric, objective_func_name = self.__get_objective_func_name()
        objective_func_name_to_monitor = self.__get_objective_func_to_monitor(
            objective_func_name)
        objective_optim_mode = 'min' if objective_func_name in [
            'root_mean_squared_error',
            'mean_squared_error',
            'mean_absolute_error',
            'r2'  # Regression objectives
        ] else 'auto'

        # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value
        if self.params.get('layers', None) is not None:
            layers = self.params['layers']
        elif self.problem_type in [REGRESSION, BINARY]:
            layers = [200, 100]
        else:
            base_size = max(len(data.classes) * 2, 100)
            layers = [base_size * 2, base_size]

        loss_func = None
        if self.problem_type in [BINARY, MULTICLASS
                                 ] and self.params.get('smoothing', 0.0) > 0.0:
            loss_func = LabelSmoothingCrossEntropy(self.params['smoothing'])

        ps = self.params['ps']
        if type(ps) != list:
            ps = [ps]

        if time_limit:
            time_elapsed = time.time() - start_time
            time_left = time_limit - time_elapsed
        else:
            time_left = None

        early_stopping_fn = partial(
            EarlyStoppingCallbackWithTimeLimit,
            monitor=objective_func_name_to_monitor,
            mode=objective_optim_mode,
            min_delta=self.params['early.stopping.min_delta'],
            patience=self.params['early.stopping.patience'],
            time_limit=time_left)

        self.model = tabular_learner(data,
                                     layers=layers,
                                     ps=ps,
                                     emb_drop=self.params['emb_drop'],
                                     metrics=nn_metric,
                                     loss_func=loss_func,
                                     callback_fns=[early_stopping_fn])
        logger.log(15, self.model.model)

        with make_temp_directory() as temp_dir:
            save_callback = SaveModelCallback(
                self.model,
                monitor=objective_func_name_to_monitor,
                mode=objective_optim_mode,
                name=self.name)
            with progress_disabled_ctx(self.model) as model:
                original_path = model.path
                model.path = Path(temp_dir)
                model.fit_one_cycle(self.params['epochs'],
                                    self.params['lr'],
                                    callbacks=save_callback)

                # Load the best one and export it
                model.load(self.name)

                if objective_func_name == 'log_loss':
                    eval_result = model.validate()[0]
                else:
                    eval_result = model.validate()[1].numpy().reshape(-1)[0]

                logger.log(15, f'Model validation metrics: {eval_result}')
                model.path = original_path
Exemple #7
0
    def _fit(self,
             X,
             y,
             X_val=None,
             y_val=None,
             time_limit=None,
             num_cpus=None,
             num_gpus=0,
             sample_weight=None,
             **kwargs):
        try_import_fastai_v1()
        import torch
        from fastai.layers import LabelSmoothingCrossEntropy
        from fastai.tabular import tabular_learner
        from fastai.utils.mod_display import progress_disabled_ctx
        from fastai.core import defaults
        from .callbacks import EarlyStoppingCallbackWithTimeLimit, SaveModelCallback

        start_time = time.time()
        if sample_weight is not None:  # TODO: support
            logger.log(
                15,
                "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training."
            )

        params = self.params.copy()

        self.y_scaler = params.get('y_scaler', None)
        if self.y_scaler is not None:
            self.y_scaler = copy.deepcopy(self.y_scaler)

        if num_cpus is None:
            num_cpus = defaults.cpus
        # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance
        num_workers = int(num_cpus / 2)
        if not is_fork_enabled():
            num_workers = 0
        if num_gpus is not None:
            if num_gpus == 0:
                # TODO: Does not obviously impact inference speed
                defaults.device = torch.device('cpu')
            else:
                defaults.device = torch.device('cuda')

        logger.log(15, f'Fitting Neural Network with parameters {params}...')
        data = self._preprocess_train(X,
                                      y,
                                      X_val,
                                      y_val,
                                      num_workers=num_workers)

        nn_metric, objective_func_name = self.__get_objective_func_name()
        objective_func_name_to_monitor = self.__get_objective_func_to_monitor(
            objective_func_name)
        objective_optim_mode = 'min' if objective_func_name in [
            'root_mean_squared_error',
            'mean_squared_error',
            'mean_absolute_error',
            'r2'  # Regression objectives
        ] else 'auto'

        # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value
        if params.get('layers', None) is not None:
            layers = params['layers']
        elif self.problem_type in [REGRESSION, BINARY]:
            layers = [200, 100]
        else:
            base_size = max(len(data.classes) * 2, 100)
            layers = [base_size * 2, base_size]

        loss_func = None
        if self.problem_type in [BINARY, MULTICLASS
                                 ] and params.get('smoothing', 0.0) > 0.0:
            loss_func = LabelSmoothingCrossEntropy(params['smoothing'])

        ps = params['ps']
        if type(ps) != list:
            ps = [ps]

        if time_limit:
            time_elapsed = time.time() - start_time
            time_left = time_limit - time_elapsed
        else:
            time_left = None

        best_epoch_stop = params.get("best_epoch",
                                     None)  # Use best epoch for refit_full.
        early_stopping_fn = partial(
            EarlyStoppingCallbackWithTimeLimit,
            monitor=objective_func_name_to_monitor,
            mode=objective_optim_mode,
            min_delta=params['early.stopping.min_delta'],
            patience=params['early.stopping.patience'],
            time_limit=time_left,
            best_epoch_stop=best_epoch_stop)

        self.model = tabular_learner(data,
                                     layers=layers,
                                     ps=ps,
                                     emb_drop=params['emb_drop'],
                                     metrics=nn_metric,
                                     loss_func=loss_func,
                                     callback_fns=[early_stopping_fn])
        logger.log(15, self.model.model)

        with make_temp_directory() as temp_dir:
            save_callback = SaveModelCallback(
                self.model,
                monitor=objective_func_name_to_monitor,
                mode=objective_optim_mode,
                name=self.name,
                best_epoch_stop=best_epoch_stop)
            with progress_disabled_ctx(self.model) as model:
                original_path = model.path
                model.path = Path(temp_dir)
                model.fit_one_cycle(params['epochs'],
                                    params['lr'],
                                    callbacks=save_callback)

                # Load the best one and export it
                model.load(self.name)

                if objective_func_name == 'log_loss':
                    eval_result = model.validate()[0]
                else:
                    eval_result = model.validate()[1].numpy().reshape(-1)[0]

                logger.log(15, f'Model validation metrics: {eval_result}')
                model.path = original_path
            self.params_trained['best_epoch'] = save_callback.best_epoch