Example #1
0
    def fit(self, hyperparameter_config, pipeline_config, train_loader,
            valid_loader, network, optimizer, optimize_metric,
            additional_metrics, log_functions, budget, loss_function,
            training_techniques, fit_start_time, refit):
        hyperparameter_config = ConfigWrapper(self.get_name(),
                                              hyperparameter_config)
        logger = logging.getLogger('autonet')
        logger.debug("Start train. Budget: " + str(budget))

        if pipeline_config["torch_num_threads"] > 0:
            torch.set_num_threads(pipeline_config["torch_num_threads"])

        trainer = Trainer(
            model=network,
            loss_computation=self.batch_loss_computation_techniques[
                hyperparameter_config["batch_loss_computation_technique"]](),
            metrics=[optimize_metric] + additional_metrics,
            log_functions=log_functions,
            criterion=loss_function,
            budget=budget,
            optimizer=optimizer,
            training_techniques=training_techniques,
            device=Trainer.get_device(pipeline_config),
            logger=logger,
            full_eval_each_epoch=pipeline_config["full_eval_each_epoch"])
        trainer.prepare(pipeline_config, hyperparameter_config, fit_start_time)

        logs = trainer.model.logs
        epoch = trainer.model.epochs_trained
        training_start_time = time.time()
        while True:
            # prepare epoch
            log = dict()
            trainer.on_epoch_start(log=log, epoch=epoch)

            # training
            optimize_metric_results, train_loss, stop_training = trainer.train(
                epoch + 1, train_loader)
            if valid_loader is not None and trainer.eval_valid_each_epoch:
                valid_metric_results = trainer.evaluate(valid_loader)

            # evaluate
            log['loss'] = train_loss
            for i, metric in enumerate(trainer.metrics):
                log['train_' + metric.name] = optimize_metric_results[i]

                if valid_loader is not None and trainer.eval_valid_each_epoch:
                    log['val_' + metric.name] = valid_metric_results[i]
            if trainer.eval_additional_logs_each_epoch:
                for additional_log in trainer.log_functions:
                    log[additional_log.name] = additional_log(
                        trainer.model, epoch)

            # wrap up epoch
            stop_training = trainer.on_epoch_end(log=log,
                                                 epoch=epoch) or stop_training

            # handle logs
            logs.append(log)
            log = {
                key: value
                for key, value in log.items()
                if not isinstance(value, np.ndarray)
            }
            logger.debug("Epoch: " + str(epoch) + " : " + str(log))
            if 'use_tensorboard_logger' in pipeline_config and pipeline_config[
                    'use_tensorboard_logger']:
                self.tensorboard_log(budget=budget, epoch=epoch, log=log)

            if stop_training:
                break

            epoch += 1
            torch.cuda.empty_cache()

        # wrap up
        loss, final_log = self.wrap_up_training(
            trainer=trainer,
            logs=logs,
            epoch=epoch,
            train_loader=train_loader,
            valid_loader=valid_loader,
            budget=budget,
            training_start_time=training_start_time,
            fit_start_time=fit_start_time,
            best_over_epochs=pipeline_config['best_over_epochs'],
            refit=refit,
            logger=logger)

        return {'loss': loss, 'info': final_log}
    def fit(self, hyperparameter_config, pipeline_config, train_loader,
            valid_loader, network, optimizer, lr_scheduler, optimize_metric,
            additional_metrics, log_functions, budget, loss_function,
            budget_type, config_id, working_directory, train_indices,
            valid_indices):

        if budget < 1e-5:
            return {
                'loss':
                float('inf') if pipeline_config["minimize"] else -float('inf'),
                'info':
                dict()
            }

        training_start_time = time.time()
        # prepare
        if not torch.cuda.is_available():
            pipeline_config["cuda"] = False

        device = torch.device('cuda' if pipeline_config['cuda'] else 'cpu')

        checkpoint_path = get_checkpoint_dir(working_directory)
        checkpoint = None
        if pipeline_config['save_checkpoints']:
            checkpoint = load_checkpoint(checkpoint_path, config_id, budget)

        network = load_model(network, checkpoint)

        tensorboard_logging = 'use_tensorboard_logger' in pipeline_config and pipeline_config[
            'use_tensorboard_logger']

        # from torch.optim import SGD
        # optimizer = SGD(network.parameters(), lr=0.3)

        # optimizer       = load_optimizer(optimizer, checkpoint, device)
        # lr_scheduler    = load_scheduler(lr_scheduler, checkpoint)

        hyperparameter_config = ConfigWrapper(self.get_name(),
                                              hyperparameter_config)

        batch_loss_name = hyperparameter_config[
            "batch_loss_computation_technique"] if "batch_loss_computation_technique" in hyperparameter_config else pipeline_config[
                "batch_loss_computation_techniques"][0]

        batch_loss_computation_technique = self.batch_loss_computation_techniques[
            batch_loss_name]()
        batch_loss_computation_technique.set_up(
            pipeline_config,
            ConfigWrapper(batch_loss_name, hyperparameter_config), self.logger)

        # Training loop
        logs = []
        epoch = 0

        optimize_metrics = []
        val_metrics = [optimize_metric] + additional_metrics
        if pipeline_config['evaluate_on_train_data']:
            optimize_metrics = val_metrics
        elif valid_loader is None:
            self.logger.warning(
                'No valid data specified and train process should not evaluate on train data! Will ignore \"evaluate_on_train_data\" and evaluate on train data!'
            )
            optimize_metrics = val_metrics

        trainer = Trainer(model=network,
                          loss_computation=batch_loss_computation_technique,
                          criterion=loss_function,
                          budget=budget,
                          optimizer=optimizer,
                          scheduler=lr_scheduler,
                          budget_type=budget_type,
                          device=device,
                          config_id=config_id,
                          checkpoint_path=checkpoint_path
                          if pipeline_config['save_checkpoints'] else None,
                          images_to_plot=tensorboard_logging *
                          pipeline_config['tensorboard_images_count'])

        model_params = self.count_parameters(network)

        start_up = time.time() - training_start_time
        epoch_train_time = 0
        val_time = 0
        log_time = 0

        # tmp = time.time()
        # for _ in range(100):
        #     for _ in train_loader:
        #         pass
        # time_used = time.time() - tmp
        # self.logger.debug("Test time: " + str(time_used) + "s : \n" + str(pprint.pformat(train_loader.dataset.get_times('train_'))))

        self.logger.debug("Start train. Budget: " + str(budget))

        last_log_time = time.time()
        while True:
            # prepare epoch
            log = dict()

            # train
            tmp = time.time()
            optimize_metric_results, train_loss, stop_training = trainer.train(
                epoch + 1, train_loader, optimize_metrics)

            log['train_loss'] = train_loss
            for i, metric in enumerate(optimize_metrics):
                log['train_' + metric.name] = optimize_metric_results[i]
            epoch_train_time += time.time() - tmp

            # evaluate
            tmp = time.time()
            if valid_loader is not None:
                valid_metric_results = trainer.evaluate(valid_loader,
                                                        val_metrics,
                                                        epoch=epoch + 1)

                for i, metric in enumerate(val_metrics):
                    log['val_' + metric.name] = valid_metric_results[i]
            val_time += time.time() - tmp

            # additional los - e.g. test evaluation
            tmp = time.time()
            for func in log_functions:
                log[func.name] = func(network, epoch + 1)
            log_time += time.time() - tmp

            log['epochs'] = epoch + 1
            log['model_parameters'] = model_params
            log['learning_rate'] = optimizer.param_groups[0]['lr']

            # log.update(train_loader.dataset.get_times('train_'))
            # log.update(valid_loader.dataset.get_times('val_'))

            logs.append(log)

            epoch += 1

            self.logger.debug("Epoch: " + str(epoch) + " : " + str(log))

            if budget_type == 'epochs' and epoch + 1 >= budget:
                break

            if stop_training:
                break

            if tensorboard_logging and time.time(
            ) - last_log_time >= pipeline_config[
                    'tensorboard_min_log_interval']:
                import tensorboard_logger as tl
                worker_path = 'Train/'
                tl.log_value(worker_path + 'budget', float(budget), epoch)
                for name, value in log.items():
                    tl.log_value(worker_path + name, float(value), epoch)
                last_log_time = time.time()

        # wrap up
        wrap_up_start_time = time.time()

        self.logger.debug("Finished Training")

        opt_metric_name = 'train_' + optimize_metric.name
        if valid_loader is not None:
            opt_metric_name = 'val_' + optimize_metric.name

        if pipeline_config["minimize"]:
            final_log = min(logs, key=lambda x: x[opt_metric_name])
        else:
            final_log = max(logs, key=lambda x: x[opt_metric_name])

        if tensorboard_logging:
            import tensorboard_logger as tl
            worker_path = 'Train/'
            tl.log_value(worker_path + 'budget', float(budget), epoch)
            for name, value in final_log.items():
                tl.log_value(worker_path + name, float(value), epoch)

        if trainer.latest_checkpoint:
            final_log['checkpoint'] = trainer.latest_checkpoint
        elif pipeline_config['save_checkpoints']:
            path = save_checkpoint(checkpoint_path, config_id, budget, network,
                                   optimizer, lr_scheduler)
            final_log['checkpoint'] = path

        final_log['train_datapoints'] = len(train_indices)
        if valid_loader is not None:
            final_log['val_datapoints'] = len(valid_indices)

        loss = final_log[opt_metric_name] * (1 if pipeline_config["minimize"]
                                             else -1)

        self.logger.info("Finished train with budget " + str(budget) +
                         "s, Training took " +
                         str(int(wrap_up_start_time - training_start_time)) +
                         "s, Wrap up took " +
                         str(int(time.time() - wrap_up_start_time)) +
                         "s, Init took " + str(int(start_up)) +
                         "s, Train took " + str(int(epoch_train_time)) +
                         "s, Validation took " + str(int(val_time)) +
                         "s, Log functions took " + str(int(log_time)) +
                         "s, Cumulative time " +
                         str(int(trainer.cumulative_time)) +
                         "s.\nTotal time consumption in s: " +
                         str(int(time.time() - training_start_time)))

        return {'loss': loss, 'info': final_log}
Example #3
0
    def fit(self, hyperparameter_config, pipeline_config,
            X_train, Y_train, X_valid, Y_valid,
            network, optimizer,
            train_metric, additional_metrics,
            log_functions,
            budget,
            loss_function,
            training_techniques,
            fit_start_time):
        # prepare
        if not torch.cuda.is_available():
            pipeline_config["cuda"] = False
        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
        training_techniques = [t() for t in self.training_techniques.values()] + training_techniques
        training_components, train_data, X_train, Y_train, X_valid, Y_valid, eval_specifics = prepare_training(
            pipeline_config=pipeline_config, hyperparameter_config=hyperparameter_config, training_techniques=training_techniques,
            batch_loss_computation_technique=self.batch_loss_computation_techniques[hyperparameter_config["batch_loss_computation_technique"]](),
            X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, batch_size=hyperparameter_config["batch_size"],
            network=network, optimizer=optimizer, loss_function=loss_function, train_metric=train_metric,
            additional_metrics=additional_metrics, log_functions=log_functions, budget=budget, logger=self.logger, fit_start_time=fit_start_time)
        self.logger.debug("Start train. Budget: " + str(budget))

        # Training loop
        logs = network.logs
        epoch = network.epochs_trained
        run_training = True
        training_start_time = time.time()

        while run_training:

            # prepare epoch
            log = dict()
            for t in training_techniques:
                t.before_train_batches(training_components, log, epoch)

            # train and eval
            log['loss'] = _train_batches(train_data, training_components, training_techniques)
            _eval_metrics(eval_specifics=eval_specifics["after_epoch"], hyperparameter_config=hyperparameter_config,
                pipeline_config=pipeline_config, training_components=training_components,
                X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, log=log, epoch=epoch, budget=budget)
            
            # check if finished and apply training techniques
            run_training = not any([t.after_train_batches(training_components, log, epoch) for t in training_techniques])

            # handle logs
            logs.append(log)
            # update_logs(t, budget, log, 5, epoch + 1, verbose, True)
            self.logger.debug("Epoch: " + str(epoch) + " : " + str(log))
            epoch += 1

        # wrap up
        wrap_up_start_time = time.time()
        network.epochs_trained = epoch
        network.logs = logs
        final_log, loss_value = wrap_up_training(pipeline_config=pipeline_config, hyperparameter_config=hyperparameter_config,
            eval_specifics=eval_specifics["after_training"], training_techniques=training_techniques, training_components=training_components,
            logs=logs, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, epoch=epoch, budget=budget)
        self.logger.debug("Finished train! Loss: " + str(loss_value) + " : " + str(final_log))
        self.logger.info("Finished train with budget " + str(budget) +
                         ": Preprocessing took " + str(int(training_start_time - fit_start_time)) +
                         "s, Training took " + str(int(wrap_up_start_time - training_start_time)) + 
                         "s, Wrap up took " + str(int(time.time() - wrap_up_start_time)) +
                         "s. Total time consumption in s: " + str(int(time.time() - fit_start_time)))
        return {'loss': loss_value, 'info': final_log}
Example #4
0
    def fit(self, hyperparameter_config, pipeline_config,
            train_loader, valid_loader,
            network, optimizer,
            optimize_metric, additional_metrics,
            log_functions,
            budget,
            loss_function,
            training_techniques,
            fit_start_time,
            refit,
            hyperparameter_config_id):
        """Train the network.
        
        Arguments:
            hyperparameter_config {dict} -- The sampled hyperparameter config.
            pipeline_config {dict} -- The user specified configuration of the pipeline
            train_loader {DataLoader} -- Data for training.
            valid_loader {DataLoader} -- Data for validation.
            network {BaseNet} -- The neural network to be trained.
            optimizer {AutoNetOptimizerBase} -- The selected optimizer.
            optimize_metric {AutoNetMetric} -- The selected metric to optimize
            additional_metrics {list} -- List of metrics, that should be logged
            log_functions {list} -- List of AutoNetLofFunctions that can log additional stuff like test performance
            budget {float} -- The budget for training
            loss_function {_Loss} -- The selected PyTorch loss module
            training_techniques {list} -- List of objects inheriting from BaseTrainingTechnique.
            fit_start_time {float} -- Start time of fit
            refit {bool} -- Whether training for refit or not.
        
        Returns:
            dict -- loss and info reported to bohb
        """
        self.hyperparameter_config_id = hyperparameter_config_id
        self.pipeline_config = pipeline_config
        self.budget = budget
        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config) 
        logger = logging.getLogger('autonet')
        logger.debug("Start train. Budget: " + str(budget))

        if pipeline_config["torch_num_threads"] > 0:
            torch.set_num_threads(pipeline_config["torch_num_threads"])

        trainer = Trainer(
            model=network,
            loss_computation=self.batch_loss_computation_techniques[hyperparameter_config["batch_loss_computation_technique"]](),
            metrics=[optimize_metric] + additional_metrics,
            log_functions=log_functions,
            criterion=loss_function,
            budget=budget,
            optimizer=optimizer,
            training_techniques=training_techniques,
            device=Trainer.get_device(pipeline_config),
            logger=logger,
            full_eval_each_epoch=pipeline_config["full_eval_each_epoch"])
        trainer.prepare(pipeline_config, hyperparameter_config, fit_start_time)

        model_params = self.count_parameters(network)

        logs = trainer.model.logs
        epoch = trainer.model.epochs_trained
        training_start_time = time.time()
        while True:
            # prepare epoch
            log = dict()
            trainer.on_epoch_start(log=log, epoch=epoch)
            
            # training
            optimize_metric_results, train_loss, stop_training = trainer.train(epoch + 1, train_loader)
            if valid_loader is not None and trainer.eval_valid_each_epoch:
                valid_metric_results = trainer.evaluate(valid_loader)

            # evaluate
            log['loss'] = train_loss
            log['model_parameters'] = model_params
            for i, metric in enumerate(trainer.metrics):
                log['train_' + metric.name] = optimize_metric_results[i]

                if valid_loader is not None and trainer.eval_valid_each_epoch:
                    log['val_' + metric.name] = valid_metric_results[i]
            if trainer.eval_additional_logs_each_epoch:
                for additional_log in trainer.log_functions:
                    log[additional_log.name] = additional_log(trainer.model, epoch)

            # wrap up epoch
            stop_training = trainer.on_epoch_end(log=log, epoch=epoch) or stop_training

            # handle logs
            logs.append(log)
            log = {key: value for key, value in log.items() if not isinstance(value, np.ndarray)}
            logger.debug("Epoch: " + str(epoch) + " : " + str(log))
            if 'use_tensorboard_logger' in pipeline_config and pipeline_config['use_tensorboard_logger']:
                self.tensorboard_log(budget=budget, epoch=epoch, log=log, logdir=pipeline_config["result_logger_dir"])

            if stop_training:
                break
            
            epoch += 1
            torch.cuda.empty_cache()

        # wrap up
        loss, final_log = self.wrap_up_training(trainer=trainer, logs=logs, epoch=epoch,
            train_loader=train_loader, valid_loader=valid_loader, budget=budget, training_start_time=training_start_time, fit_start_time=fit_start_time,
            best_over_epochs=pipeline_config['best_over_epochs'], refit=refit, logger=logger)
    
        return {'loss': loss, 'info': final_log}
Example #5
0
    def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices,
            valid_indices, train_transform, valid_transform, dataset_info):

        # if len(X.shape) > 1:
        #     return super(CreateImageDataLoader, self).fit(pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices)

        torch.manual_seed(pipeline_config["random_seed"])
        hyperparameter_config = ConfigWrapper(self.get_name(),
                                              hyperparameter_config)
        to_int64 = transform_int64

        if dataset_info.default_dataset:
            train_dataset = dataset_info.default_dataset(
                root=pipeline_config['default_dataset_download_dir'],
                train=True,
                download=True,
                transform=train_transform)
            if valid_indices is not None:
                valid_dataset = dataset_info.default_dataset(
                    root=pipeline_config['default_dataset_download_dir'],
                    train=True,
                    download=True,
                    transform=valid_transform)
        elif len(X.shape) > 1:
            train_dataset = XYDataset(X,
                                      Y,
                                      transform=train_transform,
                                      target_transform=to_int64)
            valid_dataset = XYDataset(X,
                                      Y,
                                      transform=valid_transform,
                                      target_transform=to_int64)
        else:
            train_dataset = ImageFilelist(
                X,
                Y,
                transform=train_transform,
                target_transform=to_int64,
                cache_size=pipeline_config['dataloader_cache_size_mb'] * 1000,
                image_size=dataset_info.x_shape[2:])
            if valid_indices is not None:
                valid_dataset = ImageFilelist(
                    X,
                    Y,
                    transform=valid_transform,
                    target_transform=to_int64,
                    cache_size=0,
                    image_size=dataset_info.x_shape[2:])
                valid_dataset.cache = train_dataset.cache

        train_loader = DataLoader(
            dataset=train_dataset,
            batch_size=int(hyperparameter_config['batch_size']),
            sampler=SubsetRandomSampler(train_indices),
            drop_last=True,
            pin_memory=True,
            num_workers=pipeline_config['dataloader_worker'])

        valid_loader = None
        if valid_indices is not None:
            valid_loader = DataLoader(
                dataset=valid_dataset,
                batch_size=int(hyperparameter_config['batch_size']),
                sampler=SubsetRandomSampler(valid_indices),
                drop_last=False,
                pin_memory=True,
                num_workers=pipeline_config['dataloader_worker'])

        return {
            'train_loader': train_loader,
            'valid_loader': valid_loader,
            'batch_size': hyperparameter_config['batch_size']
        }
Example #6
0
    def fit(self, pipeline_config, hyperparameter_config, dataset_info, X, Y,
            train_indices, valid_indices):
        mean, std = self.compute_mean_std(
            pipeline_config, hyperparameter_config, X, Y, train_indices,
            dataset_info)  #dataset_info.mean, dataset_info.std

        hyperparameter_config = ConfigWrapper(self.get_name(),
                                              hyperparameter_config)

        transform_list = []
        image_size = min(dataset_info.x_shape[-2], dataset_info.x_shape[-1])

        if len(X.shape) > 1:
            transform_list.append(transforms.ToPILImage())

        if hyperparameter_config['augment']:
            if hyperparameter_config[
                    'fastautoaugment'] and hyperparameter_config['autoaugment']:
                # fast autoaugment and autoaugment
                transform_list.extend([
                    FastAutoAugment(),
                    AutoAugment(),
                    transforms.Resize(image_size),
                    transforms.RandomCrop(image_size, padding=4),
                    transforms.RandomHorizontalFlip()
                ])
            elif hyperparameter_config['fastautoaugment']:
                # fast autoaugment
                transform_list.extend([
                    FastAutoAugment(),
                    transforms.Resize(image_size),
                    transforms.RandomCrop(image_size, padding=4),
                    transforms.RandomHorizontalFlip()
                ])
            elif hyperparameter_config['autoaugment']:
                # autoaugment
                transform_list.extend([
                    AutoAugment(),
                    transforms.Resize(image_size),
                    transforms.RandomCrop(image_size, padding=4),
                    transforms.RandomHorizontalFlip()
                ])
            else:
                # default augment color, rotation, size
                transform_list.extend([
                    transforms.ColorJitter(brightness=0.196,
                                           saturation=0.196,
                                           hue=0.141),
                    transforms.RandomAffine(degrees=10,
                                            shear=0.1,
                                            fillcolor=127),
                    transforms.RandomResizedCrop(image_size,
                                                 scale=(0.533, 1),
                                                 ratio=(0.75, 1.25)),
                    transforms.RandomHorizontalFlip()
                ])
        else:
            transform_list.extend([
                transforms.Resize(image_size),
                transforms.CenterCrop(image_size),
            ])

        # grayscale if only one channel
        if dataset_info.x_shape[1] == 1:
            transform_list.append(transforms.Grayscale(1))

        # normalize
        transform_list.append(transforms.ToTensor())
        transform_list.append(transforms.Normalize(mean, std))

        # cutout
        if hyperparameter_config['cutout']:
            n_holes = hyperparameter_config['cutout_holes']
            transform_list.append(
                Cutout(n_holes=1,
                       length=hyperparameter_config['length'],
                       probability=0.5))

        train_transform = transforms.Compose(transform_list)

        transform_list = []
        if len(X.shape) > 1:
            transform_list.append(transforms.ToPILImage())

        transform_list.extend([
            transforms.Resize(image_size),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
        valid_transform = transforms.Compose(
            [transforms.Grayscale(1)] +
            transform_list if dataset_info.x_shape[1] == 1 else transform_list)

        return {
            'train_transform': train_transform,
            'valid_transform': valid_transform,
            'mean': mean,
            'std': std
        }
Example #7
0
def prepare_training(pipeline_config, hyperparameter_config,
                     training_techniques, batch_loss_computation_technique,
                     X_train, Y_train, X_valid, Y_valid, batch_size, network,
                     optimizer, loss_function, train_metric,
                     additional_metrics, log_functions, budget, logger,
                     fit_start_time):
    """ Prepare the data and components for training"""

    torch.manual_seed(pipeline_config["random_seed"])
    device = torch.device('cuda:0' if pipeline_config['cuda'] else 'cpu')

    if pipeline_config['cuda']:
        logger.debug('Running on the GPU using CUDA.')
    else:
        logger.debug(
            'Not running on GPU as CUDA is either disabled or not available. Running on CPU instead.'
        )

    # initialize training techniques and training components
    batch_loss_computation_technique.set_up(
        pipeline_config,
        ConfigWrapper(
            hyperparameter_config["batch_loss_computation_technique"],
            hyperparameter_config), logger)
    training_components = {
        "network": network.to(device),
        "optimizer": optimizer,
        "loss_function": loss_function.to(device),
        "metrics": [train_metric] + additional_metrics,
        "train_metric_name": train_metric.__name__,
        "log_functions": log_functions,
        "device": device,
        "initial_budget": network.budget_trained,
        "budget": budget,
        "batch_loss_computation_technique": batch_loss_computation_technique,
        "fit_start_time": fit_start_time
    }
    [
        training_components.update(t.training_components)
        for t in training_techniques
    ]
    for t in training_techniques:
        t.set_up(training_components, pipeline_config, logger)

    # prepare data
    X_train, Y_train, X_valid, Y_valid = to_dense(X_train), to_dense(
        Y_train), to_dense(X_valid), to_dense(Y_valid)
    X_train, Y_train = torch.from_numpy(X_train).float(), torch.from_numpy(
        Y_train)
    train_data = DataLoader(TensorDataset(X_train, Y_train), batch_size, True)
    X_valid = torch.from_numpy(X_valid).float().to(
        device) if X_valid is not None else None
    Y_valid = torch.from_numpy(Y_valid).to(
        device) if Y_valid is not None else None

    # eval specifics. decide which datasets should be evaluated when.
    after_epoch_eval_specifics = {
        "train":
        any(t.needs_eval_on_train_each_epoch() for t in training_techniques)
        or (pipeline_config["full_eval_each_epoch"]
            and pipeline_config["eval_on_training"]),
        "valid":
        any(t.needs_eval_on_valid_each_epoch() for t in training_techniques)
        or pipeline_config["full_eval_each_epoch"],
        "logs":
        pipeline_config["full_eval_each_epoch"]
    }
    after_training_eval_specifics = {
        "train":
        not after_epoch_eval_specifics["train"]
        and (pipeline_config["eval_on_training"] or X_valid is None
             or Y_valid is None),
        "valid":
        not after_epoch_eval_specifics["valid"],
        "logs":
        not after_epoch_eval_specifics["logs"]
    }
    eval_specifics = {
        "after_epoch": after_epoch_eval_specifics,
        "after_training": after_training_eval_specifics
    }
    return training_components, train_data, X_train, Y_train, X_valid, Y_valid, eval_specifics