Exemple #1
0
    def on_epoch_end(self, net, dataset_train, dataset_valid, **kwargs):
        if len(self.y_preds_) == 0:
            dataset = net.get_dataset(dataset_train)
            iterator = net.get_iterator(dataset, training=False)
            y_preds = []
            y_test = []
            for data in iterator:
                batch_X, batch_y = unpack_data(data)
                yp = net.evaluation_step(batch_X, training=False)
                yp = yp.to(device="cpu")
                y_test.append(self.target_extractor(batch_y))
                y_preds.append(yp)
            y_test = np.concatenate(y_test)

            # Adding the recomputed preds to all other
            # instances of PostEpochTrainScoring of this
            # Skorch-Net (NeuralNet, BraindecodeClassifier etc.)
            # (They will be reinitialized to empty lists by skorch
            # each epoch)
            cbs = net._default_callbacks + net.callbacks
            epoch_cbs = [
                cb for name, cb in cbs if isinstance(cb, PostEpochTrainScoring)
            ]
            for cb in epoch_cbs:
                cb.y_preds_ = y_preds
                cb.y_trues_ = y_test

        # y pred should be same as self.y_preds_
        with _cache_net_forward_iter(net,
                                     use_caching=True,
                                     y_preds=self.y_preds_) as cached_net:
            current_score = self._scoring(cached_net, dataset_train,
                                          self.y_trues_)
        self._record_score(net.history, current_score)
    def evaluation_step(self, batch, training=False):
        """Perform a forward step to produce the output used for
        prediction and scoring.

        Therefore, the module is set to evaluation mode by default
        beforehand which can be overridden to re-enable features
        like dropout by setting ``training=True``.

        Parameters
        ----------
        batch
          A single batch returned by the data loader.

        training : bool (default=False)
          Whether to set the module to train mode or not.

        Returns
        -------
        y_infer
          The prediction generated by the module.

        """
        self.check_is_fitted()
        Xi, _ = unpack_data(batch)
        with torch.set_grad_enabled(
                training), gpytorch.settings.fast_pred_var():
            self.module_.train(training)
            y_infer = self.infer(Xi)
            if isinstance(y_infer, tuple):  # multiple outputs:
                return (self.likelihood_(y_infer[0]), ) + y_infer[1:]
            return self.likelihood_(y_infer)
Exemple #3
0
    def on_batch_end(self, net, batch, training, **kwargs):
        if training != self.on_train:
            return

        X, y = unpack_data(batch)
        y_preds = [kwargs['y_pred']]
        with _cache_net_forward_iter(net, self.use_caching, y_preds) as cached_net:
            # In case of y=None we will not have gathered any samples.
            # We expect the scoring function to deal with y=None.
            y = None if y is None else self.target_extractor(y)
            try:
                score = self._scoring(cached_net, X, y)
                cached_net.history.record_batch(self.name_, score)
            except KeyError:
                pass
Exemple #4
0
    def on_batch_end(
            self, net, batch, y_pred, training, **kwargs):
        if not self.use_caching or training != self.on_train:
            return

        # We collect references to the prediction and target data
        # emitted by the training process. Since we don't copy the
        # data, all *Scoring callback instances use the same
        # underlying data. This is also the reason why we don't run
        # self.target_extractor(y) here but on epoch end, so that
        # there are no copies of parts of y hanging around during
        # training.
        _X, y = unpack_data(batch)
        if y is not None:
            self.y_trues_.append(y)
        self.y_preds_.append(y_pred)
Exemple #5
0
    def on_epoch_end(self, net, dataset_train, dataset_valid, **kwargs):
        if len(self.y_preds_) == 0:
            dataset = net.get_dataset(dataset_train)
            # Prevent that rng state of torch is changed by
            # creation+usage of iterator
            # Unfortunatenly calling __iter__() of a pytorch
            # DataLoader will change the random state
            # Note line below setting rng state back
            rng_state = torch.random.get_rng_state()
            iterator = net.get_iterator(dataset, training=False)
            y_preds = []
            y_test = []
            for batch in iterator:
                batch_X, batch_y = unpack_data(batch)
                # TODO: remove after skorch 0.10 release
                if not check_version('skorch', min_version='0.10.1'):
                    yp = net.evaluation_step(batch_X, training=False)
                # X, y unpacking has been pushed downstream in skorch 0.10
                else:
                    yp = net.evaluation_step(batch, training=False)
                yp = yp.to(device="cpu")
                y_test.append(self.target_extractor(batch_y))
                y_preds.append(yp)
            y_test = np.concatenate(y_test)
            torch.random.set_rng_state(rng_state)

            # Adding the recomputed preds to all other
            # instances of PostEpochTrainScoring of this
            # Skorch-Net (NeuralNet, BraindecodeClassifier etc.)
            # (They will be reinitialized to empty lists by skorch
            # each epoch)
            cbs = net.callbacks_
            epoch_cbs = [
                cb for name, cb in cbs if isinstance(cb, PostEpochTrainScoring)
            ]
            for cb in epoch_cbs:
                cb.y_preds_ = y_preds
                cb.y_trues_ = y_test
        # y pred should be same as self.y_preds_
        # Unclear if this also leads to any
        # random generator call?
        with _cache_net_forward_iter(net,
                                     use_caching=True,
                                     y_preds=self.y_preds_) as cached_net:
            current_score = self._scoring(cached_net, dataset_train,
                                          self.y_trues_)
        self._record_score(net.history, current_score)
Exemple #6
0
    def run_single_epoch(self, dataset, training, prefix, step_fn,
                         **fit_params):
        """Compute a single epoch of train or validation.

        Parameters
        ----------
        dataset : torch Dataset
            The initialized dataset to loop over.

        training : bool
            Whether to set the module to train mode or not.

        prefix : str
            Prefix to use when saving to the history.

        step_fn : callable
            Function to call for each batch.

        **fit_params : dict
            Additional parameters passed to the ``step_fn``.
        """
        is_placeholder_y = uses_placeholder_y(dataset)

        batch_count = 0
        for i, data in enumerate(self.get_iterator(dataset,
                                                   training=training)):
            Xi, yi = unpack_data(data)
            yi_res = yi if not is_placeholder_y else None
            self.notify("on_batch_begin", X=Xi, y=yi_res, training=training)
            step = step_fn(Xi,
                           yi,
                           train_generator=(i %
                                            self.train_generator_every == 0),
                           **fit_params)
            self.history.record_batch(prefix + "_distance",
                                      step["distance"].item())
            self.history.record_batch(prefix + "_batch_size", get_len(Xi))
            self.notify("on_batch_end",
                        X=Xi,
                        y=yi_res,
                        training=training,
                        **step)
            batch_count += 1

        self.history.record(prefix + "_batch_count", batch_count)
Exemple #7
0
    def run_single_epoch(self, dataset, training, prefix, step_fn,
                         **fit_params):
        is_placeholder_y = uses_placeholder_y(dataset)

        batch_count = 0
        for data in self.get_iterator(dataset, training=training):
            Xi, yi = unpack_data(data)
            yi_res = yi if not is_placeholder_y else None
            self.notify("on_batch_begin", X=Xi, y=yi_res, training=training)
            step = step_fn(Xi, yi, **fit_params)
            self.history.record_batch(prefix + "_loss", step["loss"].item())
            self.history.record_batch(prefix + "_batch_size",
                                      get_len(Xi["nodes"]))
            self.notify("on_batch_end",
                        X=Xi,
                        y=yi_res,
                        training=training,
                        **step)
            batch_count += 1
        self.history.record(prefix + "_batch_count", batch_count)
Exemple #8
0
def loss_scoring(net, X, y=None, sample_weight=None):
    """Calculate score using the criterion of the net

    Use the exact same logic as during model training to calculate the score.

    This function can be used to implement the ``score`` method for a
    :class:`.NeuralNet` through sub-classing. This is useful, for example, when
    combining skorch models with sklearn objects that rely on the model's
    ``score`` method. For example:

    >>> class ScoredNet(skorch.NeuralNetClassifier):
    ...     def score(self, X, y=None):
    ...         return loss_scoring(self, X, y)

    Parameters
    ----------
    net : skorch.NeuralNet
        A fitted Skorch :class:`.NeuralNet` object.

    X : input data, compatible with skorch.dataset.Dataset
        By default, you should be able to pass:

          * numpy arrays
          * torch tensors
          * pandas DataFrame or Series
          * scipy sparse CSR matrices
          * a dictionary of the former three
          * a list/tuple of the former three
          * a Dataset

        If this doesn't work with your data, you have to pass a
        ``Dataset`` that can deal with the data.

    y : target data, compatible with skorch.dataset.Dataset
        The same data types as for ``X`` are supported. If your X is a Dataset
        that contains the target, ``y`` may be set to None.

    sample_weight : array-like of shape (n_samples,)
        Sample weights.

    Returns
    -------
    loss_value : float32 or np.ndarray
        Return type depends on ``net.criterion_.reduction``, and will be a float
        if reduction is ``'sum'`` or ``'mean'``. If reduction is ``'none'`` then
        this function returns a ``np.ndarray`` object.

    """
    if sample_weight is not None:
        raise NotImplementedError(
            "sample_weight for loss_scoring is not yet supported.")

    net.check_is_fitted()

    dataset = net.get_dataset(X, y)
    iterator = net.get_iterator(dataset, training=False)
    history = {"loss": [], "batch_size": []}
    reduction = net.criterion_.reduction
    if reduction not in ["mean", "sum", "none"]:
        raise ValueError(
            "Expected one of 'mean', 'sum' or 'none' "
            "for reduction but got {reduction}.".format(reduction=reduction))

    for batch in iterator:
        yp = net.evaluation_step(batch, training=False)
        yi = unpack_data(batch)[1]
        loss = net.get_loss(yp, yi)
        if reduction == "none":
            loss_value = loss.detach().cpu().numpy()
        else:
            loss_value = loss.item()
        history["loss"].append(loss_value)
        history["batch_size"].append(yi.size(0))

    if reduction == "none":
        return np.concatenate(history["loss"], 0)
    if reduction == "sum":
        return np.sum(history["loss"])
    return np.average(history["loss"], weights=history["batch_size"])