def test_get_name_error(self): dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'} name = 'BCELoss' try: get_loss_instance(dataset_properties, name) except ValueError as msg: self.assertRegex(str(msg), r"Invalid name entered for task [a-z]+_[a-z]+, " r"and output type [a-z]+-[a-z]+ currently supported losses for task include .*")
def test_losses(self): list_properties = [{'task_type': 'tabular_classification', 'output_type': 'multi-class'}, {'task_type': 'tabular_classification', 'output_type': 'binary'}, {'task_type': 'tabular_regression', 'output_type': 'continuous'}] pred_cross_entropy = torch.randn(4, 4, requires_grad=True) list_predictions = [pred_cross_entropy, torch.empty(4).random_(2), torch.randn(4)] list_names = [None, 'BCEWithLogitsLoss', None] list_targets = [torch.empty(4, dtype=torch.long).random_(4), torch.empty(4).random_(2), torch.randn(4)] for dataset_properties, pred, target, name in zip(list_properties, list_predictions, list_targets, list_names): loss = get_loss_instance(dataset_properties=dataset_properties, name=name) score = loss(pred, target) self.assertIsInstance(score, torch.Tensor)
def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Module: """ Fits a component by using an input dictionary with pre-requisites Args: X (X: Dict[str, Any]): Dependencies needed by current component to perform fit y (Any): not used. To comply with sklearn API Returns: A instance of self """ # Comply with mypy # Notice that choice here stands for the component choice framework, # where we dynamically build the configuration space by selecting the available # component choices. In this case, is what trainer choices are available assert self.choice is not None # Setup a Logger and other logging support # Writer is not pickable -- make sure it is not saved in self writer = None if 'use_tensorboard_logger' in X and X['use_tensorboard_logger']: writer = SummaryWriter(log_dir=X['backend'].temporary_directory) if X["torch_num_threads"] > 0: torch.set_num_threads(X["torch_num_threads"]) budget_tracker = BudgetTracker( budget_type=X['budget_type'], max_runtime=X['runtime'] if 'runtime' in X else None, max_epochs=X['epochs'] if 'epochs' in X else None, ) # Support additional user metrics additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None additional_losses = X['additional_losses'] if 'additional_losses' in X else None self.choice.prepare( model=X['network'], metrics=get_metrics(dataset_properties=X['dataset_properties'], names=additional_metrics), criterion=get_loss_instance(X['dataset_properties'], name=additional_losses), budget_tracker=budget_tracker, optimizer=X['optimizer'], device=self.get_device(X), metrics_during_training=X['metrics_during_training'], scheduler=X['lr_scheduler'], task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] ) total_parameter_count, trainable_parameter_count = self.count_parameters(X['network']) self.run_summary = RunSummary( total_parameter_count, trainable_parameter_count, ) epoch = 1 while True: # prepare epoch start_time = time.time() self.choice.on_epoch_start(X=X, epoch=epoch) # training train_loss, train_metrics = self.choice.train_epoch( train_loader=X['train_data_loader'], epoch=epoch, logger=self.logger, writer=writer, ) val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {} if self.eval_valid_each_epoch(X): val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer) if 'test_data_loader' in X and X['test_data_loader']: test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer) # Save training information self.run_summary.add_performance( epoch=epoch, start_time=start_time, end_time=time.time(), train_loss=train_loss, val_loss=val_loss, test_loss=test_loss, train_metrics=train_metrics, val_metrics=val_metrics, test_metrics=test_metrics, ) # Save the weights of the best model and, if patience # exhausted break training if self.early_stop_handler(X): break if self.choice.on_epoch_end(X=X, epoch=epoch): break self.logger.debug(self.run_summary.repr_last_epoch()) # Reached max epoch on next iter, don't even go there if budget_tracker.is_max_epoch_reached(epoch + 1): break epoch += 1 torch.cuda.empty_cache() # wrap up -- add score if not evaluating every epoch if not self.eval_valid_each_epoch(X): val_loss, val_metrics = self.choice.evaluate(X['val_data_loader']) if 'test_data_loader' in X and X['val_data_loader']: test_loss, test_metrics = self.choice.evaluate(X['test_data_loader']) self.run_summary.add_performance( epoch=epoch, start_time=start_time, end_time=time.time(), train_loss=train_loss, val_loss=val_loss, test_loss=test_loss, train_metrics=train_metrics, val_metrics=val_metrics, test_metrics=test_metrics, ) self.logger.debug(self.run_summary.repr_last_epoch()) self.save_model_for_ensemble() self.logger.info(f"Finished training with {self.run_summary.repr_last_epoch()}") # Tag as fitted self.fitted_ = True return X['network'].state_dict()
def test_get_no_name(self): dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'} loss = get_loss_instance(dataset_properties) self.assertTrue(isinstance(loss, nn.Module))
def test_get_name(self): dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multi-class'} name = 'CrossEntropyLoss' loss = get_loss_instance(dataset_properties, name) self.assertIsInstance(loss, nn.Module) self.assertEqual(str(loss), 'CrossEntropyLoss()')