Exemple #1
0
    def generate_standard_deviation_data(
            self, model: RegressorMixin) -> RegressorMixin:

        # Define some variables
        predicted = None
        self.standard_deviation = pd.DataFrame()

        # Loop over horizons and ask prediction for each specific horizon
        for horizon in self.validation_data.horizon.unique():
            # Make subset for this specific horizon
            sub_val = self.validation_data[self.validation_data.horizon ==
                                           horizon]
            try:
                predicted = model.predict(sub_val.iloc[:, 1:-1])
            except Exception as e:
                print("Could not get prediction from new model!", e)

            # Calculate confidence interval for this horizon
            confidence_interval_horizon = self._calculate_standard_deviation(
                sub_val.iloc[:, 0], predicted)
            confidence_interval_horizon[
                "horizon"] = horizon  # Label with respective horizon
            self.standard_deviation = pd.concat(
                [self.standard_deviation, confidence_interval_horizon])

        model.standard_deviation = self.standard_deviation

        return model
Exemple #2
0
    def _fit_and_predict_oof_model(
        self,
        estimator: RegressorMixin,
        X: ArrayLike,
        y: ArrayLike,
        train_index: ArrayLike,
        val_index: ArrayLike,
        sample_weight: Optional[ArrayLike] = None,
    ) -> Tuple[RegressorMixin, NDArray, ArrayLike]:
        """
        Fit a single out-of-fold model on a given training set and
        perform predictions on a test set.

        Parameters
        ----------
        estimator : RegressorMixin
            Estimator to train.

        X : ArrayLike of shape (n_samples, n_features)
            Input data.

        y : ArrayLike of shape (n_samples,)
            Input labels.

        train_index : ArrayLike of shape (n_samples_train)
            Training data indices.

        val_index : ArrayLike of shape (n_samples_val)
            Validation data indices.

        sample_weight : Optional[ArrayLike] of shape (n_samples,)
            Sample weights. If None, then samples are equally weighted.
            By default ``None``.

        Returns
        -------
        Tuple[RegressorMixin, NDArray, ArrayLike]

        - [0]: RegressorMixin, fitted estimator
        - [1]: NDArray of shape (n_samples_val,),
          estimator predictions on the validation fold.
        - [3]: ArrayLike of shape (n_samples_val,),
          validation data indices.
        """
        X_train = _safe_indexing(X, train_index)
        y_train = _safe_indexing(y, train_index)
        X_val = _safe_indexing(X, val_index)
        if sample_weight is None:
            estimator = fit_estimator(estimator, X_train, y_train)
        else:
            sample_weight_train = _safe_indexing(sample_weight, train_index)
            estimator = fit_estimator(
                estimator, X_train, y_train, sample_weight_train
            )
        if _num_samples(X_val) > 0:
            y_pred = estimator.predict(X_val)
        else:
            y_pred = np.array([])
        return estimator, y_pred, val_index
Exemple #3
0
def produce_submission(model: RegressorMixin):
    td = load_test_data()
    out: pd.DataFrame = model.predict(td)
    submission_data = pd.DataFrame([td.index, out]).T
    submission_data.columns = ['Id', 'SalePrice']
    submission_data = submission_data.astype({'Id': int, 'SalePrice': float})
    submission_data['SalePrice'].round(decimals=2)
    submission_data.to_csv('../data/submissions/nearest-neighbors.csv', header=['Id', 'SalePrice'], index=False)
def bootstrap_regressor(
    f: RegressorMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the regressor f, and compute it's bootstrapped accuracy over the dataset `X`,`y`.
    Generate `num_samples` samples; and seed the resampler with `random_state`.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(
            y_pred, y, random_state=trial + random_state
        )  # type:ignore
        score = mean_squared_error(y_true=sample_truth, y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist
Exemple #5
0
def bootstrap_mae(
    f: RegressorMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the regressor ``f``, and compute it's bootstrapped mse over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore (predict not on ClassifierMixin)
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(
            y_pred, y, random_state=trial + random_state
        )  # type:ignore
        score = mean_absolute_error(y_true=sample_truth, y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist