コード例 #1
0
    def apply(self, data: Data) -> NormalPredictiveDistribution:
        r"""Predicts new inputs.

        Parameters:
            data: finite indexed data to predict;

        Returns:
            predictive normal distribution
        """

        data = params.instance(
            data, Data
        )  # todo: params.data(..., is_finite=True, is_labeled=True)

        xpred = params.real_matrix(data.samples())

        # predict
        # scikit-learn's RandomForestRegressor.predict() method does not support
        # returning predictions for all trees in the ensemble. Therefore,
        # `preds = self._model.predict(xpred)` is insufficient.

        if self._uncertainties is None and self._correlations is None:
            preds = self._model.predict(xpred)
            return DeltaPredictiveDistribution(mean=preds)
        elif self._uncertainties == "naive":
            preds = np.asfarray([tree.predict(xpred) for tree in self._model.estimators_])
            if self._correlations is None:
                return NormalPredictiveDistribution(
                    mean=np.mean(preds, axis=0), stddev=np.std(preds, axis=0)
                )
            elif self._correlations == "naive":
                if (data.num_samples > 25000) and not self._force_corr:
                    warn(
                        "Input correlations requested for >2.5E4 predictions."
                        " Corelation matrix will not be computed, because a matrix this large may"
                        " take up too much RAM. (2.5E4^2 entries * 8 byes per entry / 1E6 bytes per MB = 3200MB)."
                        " To force computation anyway, set `force_corr = True` in learner constructor.",
                        UserWarning,
                    )
                    return NormalPredictiveDistribution(
                        mean=np.mean(preds, axis=0), stddev=np.std(preds, axis=0)
                    )
                else:
                    # Must handle single-prediction separately, as in this case np.corrcoef
                    # will return single number rather than 1x1 array.
                    if preds.shape[1] == 1:
                        corr = np.array([[1]])
                    else:
                        corr = np.corrcoef(preds, rowvar=False)
                    return CorrelatedNormalPredictiveDistribution(
                        mean=np.mean(preds, axis=0), stddev=np.std(preds, axis=0), corr=corr
                    )
            else:
                raise BenchmarkError(
                    "internal error, unknown parameter for correlations of RandomForestRegressionSklearn"
                )
        else:
            raise BenchmarkError(
                "internal error, unknown parameter for uncertainties of RandomForestRegressionSklearn"
            )
コード例 #2
0
    def apply(self, data: Data) -> NormalPredictiveDistribution:
        r"""Predicts new inputs.

        For Gaussian processes, both the noise-free predictive (posterior)
        distribution as well as the noise estimate are normally distributed.
        The predictive distribution with noise is the sum of the former two.

        The $\alpha$ training noise specified at initialization time is not
        added at prediction time, and thus not part of the noise model.
        The current implementation considers contributions from any
        WhiteKernel or other kernel that has a hyperparameter 'noise_level'.

        Limitations:
            It is a currently accepted shortcoming that WhiteKernels that are
            not 'first-level' sum members might yield wrong noise models.
            Examples:
            WhiteKernel(...) + other kernels will work
            kernel(...) * WhiteKernel(...) will not work as intended

            Training data noise $\alpha$ is not added

        Parameters:
            data: finite indexed data to predict;

        Returns:
            predictive normal distribution with the following decomposition:
                predicted: sum of model and noise distribution
                noise_part: normal distribution for estimated noise
                signal_part: normal distribution for estimated model contribution;
                             the Gaussian process' "predictive variance";
                             depends only on distance from the training data
        """

        data = params.instance(
            data,
            Data)  # todo: params.data(..., is_finite=True, is_labeled=True)

        xpred = params.real_matrix(data.samples())
        n = data.num_samples

        # predict
        preds, stddevs = self._model.predict(xpred, return_std=True)

        # noise
        # noise are all noise_level of WhiteKernel, where noise_level is variance (not standard deviation)
        # this assumes that the noise level are independent
        noise = tuple(v for k, v in self._model.kernel_.get_params().items()
                      if k.endswith("noise_level"))
        noise = np.ones(shape=n) * np.sum(noise)
        noise_part = NormalPredictiveDistribution(mean=np.zeros(shape=n),
                                                  stddev=np.sqrt(noise))

        return NormalPredictiveDistribution(
            mean=preds,
            stddev=np.sqrt(np.square(stddevs) + noise),
            noise_part=noise_part,
            signal_part=NormalPredictiveDistribution(mean=preds,
                                                     stddev=stddevs),
        )
コード例 #3
0
    def apply(
        self, data: Data
    ) -> Union[DeltaPredictiveDistribution, NormalPredictiveDistribution]:
        r"""Predicts new inputs.

        Parameters:
            data: finite indexed data to predict;

        Returns:
            predictive normal distribution
        """

        data = params.instance(data, Data)

        xpred = params.real_matrix(data.samples())

        # predict
        # scikit-learn's ExtraTreesRegressor.predict() method does not support
        # returning predictions for all trees in the ensemble. Therefore,
        # `preds = self._model.predict(xpred)` is insufficient.

        if self._uncertainties is None:
            preds = self._model.predict(xpred)
            return DeltaPredictiveDistribution(mean=preds)
        elif self._uncertainties == "naive":
            preds = np.asfarray([tree.predict(xpred) for tree in self._model.estimators_])
            return NormalPredictiveDistribution(
                mean=np.mean(preds, axis=0), stddev=np.std(preds, axis=0)
            )
        else:
            raise BenchmarkError(
                "internal error, unknown parameter for uncertainties of ExtremelyRandomizedTreesRegressionSklearn"
            )
コード例 #4
0
    def apply(self, data: Data) -> PredictiveDistribution:
        """Predicts new inputs.

        Parameters:
            data: finite indexed data to predict

        Returns:
            predictive normal distributions if predictive uncertainties were requested,
            otherwise delta distributions
        """

        data = params.instance(
            data, Data
        )  # todo: params.data(..., is_labeled=True, is_finite=True)

        xpred = params.real_matrix(data.samples())

        if self._with_uncertainties:
            try:
                preds, stddevs = self._model.predict(xpred, return_std=True)
                return NormalPredictiveDistribution(mean=preds, stddev=stddevs)
            except Py4JJavaError as e:
                raise BenchmarkError("applying lolo model failed") from e
        else:
            try:
                preds = self._model.predict(xpred, return_std=False)
                return DeltaPredictiveDistribution(mean=preds)
            except Py4JJavaError as e:
                raise BenchmarkError("applying lolo model failed") from e
コード例 #5
0
ファイル: identity_learner.py プロジェクト: syam-s/smlb
 def apply(self, data: Data):
     if not data.is_finite:
         raise InvalidParameterError(
             "a finite dataset",
             f"an infinite dataset of type {data.__class__}")
     means = self._function.labels(data.samples())
     stddevs = np.zeros_like(means)
     return NormalPredictiveDistribution(means, stddevs)
コード例 #6
0
ファイル: test_optimizers.py プロジェクト: syam-s/smlb
    def apply(self, data: Data):
        if not data.is_finite:
            raise InvalidParameterError(
                "a finite dataset",
                f"an infinite dataset of type {data.__class__}")

        means = np.random.uniform(0, 10, data.num_samples)
        stddevs = np.random.uniform(0.5, 2.0, data.num_samples)
        return NormalPredictiveDistribution(means, stddevs)