def __init__(self, **kwargs):
     try_import_torch()
     super().__init__(**kwargs)
     import torch
     self._verbosity = None
     self._temp_file_name = "tab_trans_temp.pth"
     self._period_columns_mapping = None
    def __init__(self, **kwargs):
        try_import_torch()
        super().__init__(**kwargs)
        import torch
        self._verbosity = None
        self._temp_file_name = "tab_trans_temp.pth"
        self._period_columns_mapping = None

        # TODO: Take in num_gpu's as a param. Currently this is hard-coded upon detection of cuda.
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")
Esempio n. 3
0
    def _fit(self, X, y, X_val=None, y_val=None,
             time_limit=None, sample_weight=None, num_cpus=1, num_gpus=0, reporter=None, verbosity=2, **kwargs):
        try_import_torch()
        import torch
        torch.set_num_threads(num_cpus)
        from .tabular_torch_dataset import TabularTorchDataset

        start_time = time.time()

        params = self._get_model_params()

        processor_kwargs, optimizer_kwargs, fit_kwargs, loss_kwargs, params = self._prepare_params(params=params)

        seed_value = params.pop('seed_value', 0)

        self._num_cpus_infer = params.pop('_num_cpus_infer', 1)
        if seed_value is not None:  # Set seeds
            random.seed(seed_value)
            np.random.seed(seed_value)
            torch.manual_seed(seed_value)

        if sample_weight is not None:  # TODO: support
            logger.log(15, f"sample_weight not yet supported for {self.__class__.__name__},"
                           " this model will ignore them in training.")

        if num_cpus is not None:
            self.num_dataloading_workers = max(1, int(num_cpus/2.0))
        else:
            self.num_dataloading_workers = 1
        if self.num_dataloading_workers == 1:
            self.num_dataloading_workers = 0  # TODO: verify 0 is typically faster and uses less memory than 1 in pytorch
        self.num_dataloading_workers = 0  # TODO: >0 crashes on MacOS
        self.max_batch_size = params.pop('max_batch_size', 512)
        batch_size = params.pop('batch_size', None)
        if batch_size is None:
            if isinstance(X, TabularTorchDataset):
                batch_size = min(int(2 ** (3 + np.floor(np.log10(len(X))))), self.max_batch_size)
            else:
                batch_size = min(int(2 ** (3 + np.floor(np.log10(X.shape[0])))), self.max_batch_size)

        train_dataset, val_dataset = self._generate_datasets(X=X, y=y, params=processor_kwargs, X_val=X_val, y_val=y_val)
        logger.log(15, f"Training data for {self.__class__.__name__} has: "
                       f"{train_dataset.num_examples} examples, {train_dataset.num_features} features "
                       f"({len(train_dataset.feature_groups['vector'])} vector, {len(train_dataset.feature_groups['embed'])} embedding)")

        self.device = self._get_device(num_gpus=num_gpus)

        self._get_net(train_dataset, params=params)
        self.optimizer = self._init_optimizer(**optimizer_kwargs)

        if time_limit is not None:
            time_elapsed = time.time() - start_time
            time_limit_orig = time_limit
            time_limit = time_limit - time_elapsed

            # if 60% of time was spent preprocessing, likely not enough time to train model
            if time_limit <= time_limit_orig * 0.4:
                raise TimeLimitExceeded

        # train network
        self._train_net(train_dataset=train_dataset,
                        loss_kwargs=loss_kwargs,
                        batch_size=batch_size,
                        val_dataset=val_dataset,
                        time_limit=time_limit,
                        reporter=reporter,
                        verbosity=verbosity,
                        **fit_kwargs)
    def _fit(self, X, y, X_val=None, y_val=None,
             time_limit=None, sample_weight=None, num_cpus=1, num_gpus=0, reporter=None, **kwargs):
        try_import_torch()
        import torch
        from .tabular_nn_torch import TabularPyTorchDataset

        start_time = time.time()
        self.verbosity = kwargs.get('verbosity', 2)
        if sample_weight is not None:  # TODO: support
            logger.log(15, "sample_weight not yet supported for TabularNeuralQuantileModel,"
                           " this model will ignore them in training.")
        params = self.params.copy()
        params = fixedvals_from_searchspaces(params)
        if num_cpus is not None:
            self.num_dataloading_workers = max(1, int(num_cpus/2.0))
        else:
            self.num_dataloading_workers = 1
        if self.num_dataloading_workers == 1:
            self.num_dataloading_workers = 0  # 0 is always faster and uses less memory than 1
        self.num_dataloading_workers = 0
        self.max_batch_size = params['max_batch_size']
        if isinstance(X, TabularPyTorchDataset):
            self.batch_size = min(int(2 ** (3 + np.floor(np.log10(len(X))))), self.max_batch_size)
        else:
            self.batch_size = min(int(2 ** (3 + np.floor(np.log10(X.shape[0])))), self.max_batch_size)

        train_dataset, val_dataset = self.generate_datasets(X=X, y=y, params=params, X_val=X_val, y_val=y_val)
        logger.log(15, "Training data for TabularNeuralQuantileModel has: %d examples, %d features "
                       "(%d vector, %d embedding, %d language)" %
                   (train_dataset.num_examples, train_dataset.num_features,
                    len(train_dataset.feature_groups['vector']), len(train_dataset.feature_groups['embed']),
                    len(train_dataset.feature_groups['language'])))

        if num_gpus is not None and num_gpus >= 1:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
                if num_gpus > 1:
                    logger.warning("TabularNeuralQuantileModel not yet configured to use more than 1 GPU."
                                   " 'num_gpus' set to >1, but we will be using only 1 GPU.")
            else:
                self.device = torch.device("cpu")
        else:
            self.device = torch.device("cpu")

        self.get_net(train_dataset, params=params)

        if time_limit is not None:
            time_elapsed = time.time() - start_time
            time_limit_orig = time_limit
            time_limit = time_limit - time_elapsed

            # if 60% of time was spent preprocessing, likely not enough time to train model
            if time_limit <= time_limit_orig * 0.4:
                raise TimeLimitExceeded

        # train network
        self.train_net(train_dataset=train_dataset,
                       params=params,
                       val_dataset=val_dataset,
                       initialize=True,
                       setup_trainer=True,
                       time_limit=time_limit,
                       reporter=reporter)
        self.params_post_fit = params
Esempio n. 5
0
    def _hyperparameter_tune(self, X, y, X_val, y_val, scheduler_options,
                             **kwargs):
        """ Performs HPO and sets self.params to best hyperparameter values """
        try_import_torch()
        from .tabular_nn_torch import tabular_pytorch_trial, TabularPyTorchDataset

        time_start = time.time()
        self.verbosity = kwargs.get('verbosity', 2)
        logger.log(15, "Beginning hyperparameter tuning for Neural Network...")

        # changes non-specified default hyperparams from fixed values to search-spaces.
        self._set_default_searchspace()
        scheduler_cls, scheduler_params = scheduler_options  # Unpack tuple
        if scheduler_cls is None or scheduler_params is None:
            raise ValueError(
                "scheduler_cls and scheduler_params cannot be None for hyperparameter tuning"
            )
        num_cpus = scheduler_params['resource']['num_cpus']

        params_copy = self.params.copy()

        self.num_dataloading_workers = max(1, int(num_cpus / 2.0))
        self.max_batch_size = params_copy['max_batch_size']
        self.batch_size = min(int(2**(3 + np.floor(np.log10(X.shape[0])))),
                              self.max_batch_size)
        train_dataset, val_dataset = self.generate_datasets(X=X,
                                                            y=y,
                                                            params=params_copy,
                                                            X_val=X_val,
                                                            y_val=y_val)
        train_path = self.path + "train"
        val_path = self.path + "validation"
        train_dataset.save(file_prefix=train_path)
        val_dataset.save(file_prefix=val_path)

        if not np.any([
                isinstance(params_copy[hyperparam], Space)
                for hyperparam in params_copy
        ]):
            logger.warning(
                "Warning: Attempting to do hyperparameter optimization without any search space (all hyperparameters are already fixed values)"
            )
        else:
            logger.log(15, "Hyperparameter search space for Neural Network: ")
            for hyperparam in params_copy:
                if isinstance(params_copy[hyperparam], Space):
                    logger.log(
                        15,
                        str(hyperparam) + ":   " +
                        str(params_copy[hyperparam]))

        util_args = dict(
            train_path=train_path,
            val_path=val_path,
            model=self,
            time_start=time_start,
            time_limit=scheduler_params['time_out'],
            fit_kwargs=scheduler_params['resource'],
        )
        tabular_pytorch_trial.register_args(util_args=util_args, **params_copy)
        scheduler = scheduler_cls(tabular_pytorch_trial, **scheduler_params)
        if ('dist_ip_addrs' in scheduler_params) and (len(
                scheduler_params['dist_ip_addrs']) > 0):
            # TODO: Ensure proper working directory setup on remote machines
            # This is multi-machine setting, so need to copy dataset to workers:
            logger.log(15, "Uploading preprocessed data to remote workers...")
            scheduler.upload_files([
                train_path + TabularPyTorchDataset.DATAOBJ_SUFFIX,
                val_path + TabularPyTorchDataset.DATAOBJ_SUFFIX,
            ])  # TODO: currently does not work.
            logger.log(15, "uploaded")

        scheduler.run()
        scheduler.join_jobs()

        return self._get_hpo_results(scheduler=scheduler,
                                     scheduler_params=scheduler_params,
                                     time_start=time_start)