Ejemplo n.º 1
0
 def test_get_name_error(self):
     dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'}
     name = 'BCELoss'
     try:
         get_loss_instance(dataset_properties, name)
     except ValueError as msg:
         self.assertRegex(str(msg), r"Invalid name entered for task [a-z]+_[a-z]+, "
                                    r"and output type [a-z]+-[a-z]+ currently supported losses for task include .*")
Ejemplo n.º 2
0
 def test_losses(self):
     list_properties = [{'task_type': 'tabular_classification', 'output_type': 'multi-class'},
                        {'task_type': 'tabular_classification', 'output_type': 'binary'},
                        {'task_type': 'tabular_regression', 'output_type': 'continuous'}]
     pred_cross_entropy = torch.randn(4, 4, requires_grad=True)
     list_predictions = [pred_cross_entropy, torch.empty(4).random_(2), torch.randn(4)]
     list_names = [None, 'BCEWithLogitsLoss', None]
     list_targets = [torch.empty(4, dtype=torch.long).random_(4), torch.empty(4).random_(2), torch.randn(4)]
     for dataset_properties, pred, target, name in zip(list_properties, list_predictions, list_targets, list_names):
         loss = get_loss_instance(dataset_properties=dataset_properties, name=name)
         score = loss(pred, target)
         self.assertIsInstance(score, torch.Tensor)
    def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Module:
        """
        Fits a component by using an input dictionary with pre-requisites

        Args:
            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
            y (Any): not used. To comply with sklearn API

        Returns:
            A instance of self
        """

        # Comply with mypy
        # Notice that choice here stands for the component choice framework,
        # where we dynamically build the configuration space by selecting the available
        # component choices. In this case, is what trainer choices are available
        assert self.choice is not None

        # Setup a Logger and other logging support
        # Writer is not pickable -- make sure it is not saved in self
        writer = None
        if 'use_tensorboard_logger' in X and X['use_tensorboard_logger']:
            writer = SummaryWriter(log_dir=X['backend'].temporary_directory)

        if X["torch_num_threads"] > 0:
            torch.set_num_threads(X["torch_num_threads"])

        budget_tracker = BudgetTracker(
            budget_type=X['budget_type'],
            max_runtime=X['runtime'] if 'runtime' in X else None,
            max_epochs=X['epochs'] if 'epochs' in X else None,
        )

        # Support additional user metrics
        additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None
        additional_losses = X['additional_losses'] if 'additional_losses' in X else None
        self.choice.prepare(
            model=X['network'],
            metrics=get_metrics(dataset_properties=X['dataset_properties'],
                                names=additional_metrics),
            criterion=get_loss_instance(X['dataset_properties'],
                                        name=additional_losses),
            budget_tracker=budget_tracker,
            optimizer=X['optimizer'],
            device=self.get_device(X),
            metrics_during_training=X['metrics_during_training'],
            scheduler=X['lr_scheduler'],
            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']]
        )
        total_parameter_count, trainable_parameter_count = self.count_parameters(X['network'])
        self.run_summary = RunSummary(
            total_parameter_count,
            trainable_parameter_count,
        )

        epoch = 1

        while True:

            # prepare epoch
            start_time = time.time()

            self.choice.on_epoch_start(X=X, epoch=epoch)

            # training
            train_loss, train_metrics = self.choice.train_epoch(
                train_loader=X['train_data_loader'],
                epoch=epoch,
                logger=self.logger,
                writer=writer,
            )

            val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
            if self.eval_valid_each_epoch(X):
                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
                if 'test_data_loader' in X and X['test_data_loader']:
                    test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)

            # Save training information
            self.run_summary.add_performance(
                epoch=epoch,
                start_time=start_time,
                end_time=time.time(),
                train_loss=train_loss,
                val_loss=val_loss,
                test_loss=test_loss,
                train_metrics=train_metrics,
                val_metrics=val_metrics,
                test_metrics=test_metrics,
            )

            # Save the weights of the best model and, if patience
            # exhausted break training
            if self.early_stop_handler(X):
                break

            if self.choice.on_epoch_end(X=X, epoch=epoch):
                break

            self.logger.debug(self.run_summary.repr_last_epoch())

            # Reached max epoch on next iter, don't even go there
            if budget_tracker.is_max_epoch_reached(epoch + 1):
                break

            epoch += 1

            torch.cuda.empty_cache()

        # wrap up -- add score if not evaluating every epoch
        if not self.eval_valid_each_epoch(X):
            val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'])
            if 'test_data_loader' in X and X['val_data_loader']:
                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
            self.run_summary.add_performance(
                epoch=epoch,
                start_time=start_time,
                end_time=time.time(),
                train_loss=train_loss,
                val_loss=val_loss,
                test_loss=test_loss,
                train_metrics=train_metrics,
                val_metrics=val_metrics,
                test_metrics=test_metrics,
            )
            self.logger.debug(self.run_summary.repr_last_epoch())
            self.save_model_for_ensemble()

        self.logger.info(f"Finished training with {self.run_summary.repr_last_epoch()}")

        # Tag as fitted
        self.fitted_ = True

        return X['network'].state_dict()
Ejemplo n.º 4
0
 def test_get_no_name(self):
     dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'}
     loss = get_loss_instance(dataset_properties)
     self.assertTrue(isinstance(loss, nn.Module))
Ejemplo n.º 5
0
 def test_get_name(self):
     dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'}
     name = 'CrossEntropyLoss'
     loss = get_loss_instance(dataset_properties, name)
     self.assertIsInstance(loss, nn.Module)
     self.assertEqual(str(loss), 'CrossEntropyLoss()')