Ejemplo n.º 1
0
    def generate_standard_deviation_data(
            self, model: RegressorMixin) -> RegressorMixin:

        # Define some variables
        predicted = None
        self.standard_deviation = pd.DataFrame()

        # Loop over horizons and ask prediction for each specific horizon
        for horizon in self.validation_data.horizon.unique():
            # Make subset for this specific horizon
            sub_val = self.validation_data[self.validation_data.horizon ==
                                           horizon]
            try:
                predicted = model.predict(sub_val.iloc[:, 1:-1])
            except Exception as e:
                print("Could not get prediction from new model!", e)

            # Calculate confidence interval for this horizon
            confidence_interval_horizon = self._calculate_standard_deviation(
                sub_val.iloc[:, 0], predicted)
            confidence_interval_horizon[
                "horizon"] = horizon  # Label with respective horizon
            self.standard_deviation = pd.concat(
                [self.standard_deviation, confidence_interval_horizon])

        model.standard_deviation = self.standard_deviation

        return model
Ejemplo n.º 2
0
    def __init__(self, binner=None, estimator=None, n_jobs=None, verbose=False):
        """
        @param      binner              transformer or predictor which creates the buckets
        @param      estimator           predictor trained on every bucket
        @param      n_jobs              number of parallel jobs (for training and predicting)
        @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                        to fit the estimators

        *binner* allows the following values:

        - ``tree``: the model is :epkg:`sklearn:tree:DecisionTreeRegressor`
        - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer`
        - any instanciated model

        *estimator* allows the following values:

        - ``None``: the model is :epkg:`sklearn:linear_model:LinearRegression`
        - any instanciated model
        """
        if estimator is None:
            estimator = LinearRegression()
        if binner in ('tree', None):
            binner = DecisionTreeRegressor(min_samples_leaf=2)
        RegressorMixin.__init__(self)
        PiecewiseEstimator.__init__(self, binner=binner, estimator=estimator,
                                    n_jobs=n_jobs, verbose=verbose)
Ejemplo n.º 3
0
 def __init__(self, num_inputs, mxseed=0, epochs=5000, net_type=1):
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     self.net = None
     self.num_inputs = num_inputs
     self.mxseed = mxseed
     self.epochs = epochs
     self.net_type = net_type
     return
Ejemplo n.º 4
0
Archivo: prm.py Proyecto: amarvin/sprm
 def valscore(self, Xn, yn, scoring):
     if scoring == 'weighted':
         return (RegressorMixin.score(self,
                                      Xn,
                                      yn,
                                      sample_weight=self.caseweights_))
     elif scoring == 'normal':
         return (RegressorMixin.score(self, Xn, yn))
     else:
         ValueError('Scoring flag must be set to "weighted" or "normal".')
Ejemplo n.º 5
0
 def valscore(self,Xn,yn,scoring):
     n,p,Xn = _predict_check_input(Xn)
     (n,p) = Xn.shape
     if p!= self.X.shape[1]:
         raise(ValueError('New data must have seame number of columns as the ones the model has been trained with'))
     if scoring=='weighted':
         return(RegressorMixin.score(self,Xn,yn,sample_weight=self.caseweights_))
     elif scoring=='normal':
         return(RegressorMixin.score(self,Xn,yn))
     else:
         raise(ValueError('Scoring flag must be set to "weighted" or "normal".'))
Ejemplo n.º 6
0
 def __init__(self, model='SIR', t=0, max_iter=100,
              learning_rate_init=0.1, lr_schedule='constant',
              momentum=0.9, power_t=0.5, early_th=None,
              min_threshold='auto', max_threshold='auto',
              verbose=False, init=None):
     if init is not None:
         if isinstance(init, EpidemicRegressor):
             if hasattr(init, 'coef_'):
                 init = init.coef_.copy()
             else:
                 init = None  # pragma: no cover
         elif not isinstance(init, dict):
             raise TypeError(
                 f"init must be a dictionary not {type(init)}.")
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     self.t = t
     self.model = model
     self.max_iter = max_iter
     self.learning_rate_init = learning_rate_init
     self.lr_schedule = lr_schedule
     self.momentum = momentum
     self.power_t = power_t
     self.early_th = early_th
     self.verbose = verbose
     if min_threshold == 'auto':
         if model.upper() in ('SIR', 'SIRD'):
             min_threshold = 0.0001
         elif model.upper() in ('SIRC', ):
             pmin = dict(beta=0.001, nu=0.0001, mu=0.0001,
                         a=-1., b=0., c=0.)
             min_threshold = numpy.array(
                 [pmin[k[0]] for k in CovidSIRDc.P0])
         elif model.upper() in ('SIRDC'):
             pmin = dict(beta=0.001, nu=0.001, mu=0.001,
                         a=-1., b=0., c=0.)
             min_threshold = numpy.array(
                 [pmin[k[0]] for k in CovidSIRDc.P0])
     if max_threshold == 'auto':
         if model.upper() in ('SIR', 'SIRD'):
             max_threshold = 1.
         elif model.upper() in ('SIRC', 'SIRDC'):
             pmax = dict(beta=1., nu=0.5, mu=0.5,
                         a=0., b=4., c=2.)
             max_threshold = numpy.array(
                 [pmax[k[0]] for k in CovidSIRDc.P0])
     self.min_threshold = min_threshold
     self.max_threshold = max_threshold
     self._get_model()
     self.init = init
     if init is not None:
         self.coef_ = init
Ejemplo n.º 7
0
 def __init__(self, force_positive=False, **kwargs):
     """
     *kwargs* should contains parameters
     for :epkg:`sklearn:decomposition:NMF`.
     The parameter *force_positive* removes all
     negative predictions and replaces by zero.
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     MultiOutputMixin.__init__(self)
     for k, v in kwargs.items():
         setattr(self, k, v)
     self.force_positive = force_positive
Ejemplo n.º 8
0
 def valscore(self,Xn,yn,scoring):
     if type(Xn) == ps.core.frame.DataFrame:
         Xn = Xn.to_numpy()
     if type(yn) in [ps.core.frame.DataFrame,ps.core.series.Series]:
         yn = yn.to_numpy().T.astype('float64')
     (n,p) = Xn.shape
     if p!= self.X.shape[1]:
         raise(ValueError('New data must have seame number of columns as the ones the model has been trained with'))
     if scoring=='weighted':
         return(RegressorMixin.score(self,Xn,yn,sample_weight=self.caseweights_))
     elif scoring=='normal':
         return(RegressorMixin.score(self,Xn,yn))
     else:
         raise(ValueError('Scoring flag must be set to "weighted" or "normal".'))
Ejemplo n.º 9
0
    def _fit_and_predict_oof_model(
        self,
        estimator: RegressorMixin,
        X: ArrayLike,
        y: ArrayLike,
        train_index: ArrayLike,
        val_index: ArrayLike,
        sample_weight: Optional[ArrayLike] = None,
    ) -> Tuple[RegressorMixin, NDArray, ArrayLike]:
        """
        Fit a single out-of-fold model on a given training set and
        perform predictions on a test set.

        Parameters
        ----------
        estimator : RegressorMixin
            Estimator to train.

        X : ArrayLike of shape (n_samples, n_features)
            Input data.

        y : ArrayLike of shape (n_samples,)
            Input labels.

        train_index : ArrayLike of shape (n_samples_train)
            Training data indices.

        val_index : ArrayLike of shape (n_samples_val)
            Validation data indices.

        sample_weight : Optional[ArrayLike] of shape (n_samples,)
            Sample weights. If None, then samples are equally weighted.
            By default ``None``.

        Returns
        -------
        Tuple[RegressorMixin, NDArray, ArrayLike]

        - [0]: RegressorMixin, fitted estimator
        - [1]: NDArray of shape (n_samples_val,),
          estimator predictions on the validation fold.
        - [3]: ArrayLike of shape (n_samples_val,),
          validation data indices.
        """
        X_train = _safe_indexing(X, train_index)
        y_train = _safe_indexing(y, train_index)
        X_val = _safe_indexing(X, val_index)
        if sample_weight is None:
            estimator = fit_estimator(estimator, X_train, y_train)
        else:
            sample_weight_train = _safe_indexing(sample_weight, train_index)
            estimator = fit_estimator(
                estimator, X_train, y_train, sample_weight_train
            )
        if _num_samples(X_val) > 0:
            y_pred = estimator.predict(X_val)
        else:
            y_pred = np.array([])
        return estimator, y_pred, val_index
 def __init__(self, rf_estimator=None, lasso_estimator=None):
     """
     @param  rf_estimator    random forest estimator,
                             :epkg:`sklearn:ensemble:RandomForestRegressor`
                             by default
     @param  lass_estimator  Lasso estimator,
                             :epkg:`sklearn:linear_model:LassoRegression`
                             by default
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     if rf_estimator is None:
         rf_estimator = RandomForestRegressor()
     if lasso_estimator is None:
         lasso_estimator = Lasso()
     self.rf_estimator = rf_estimator
     self.lasso_estimator = lasso_estimator
Ejemplo n.º 11
0
def produce_submission(model: RegressorMixin):
    td = load_test_data()
    out: pd.DataFrame = model.predict(td)
    submission_data = pd.DataFrame([td.index, out]).T
    submission_data.columns = ['Id', 'SalePrice']
    submission_data = submission_data.astype({'Id': int, 'SalePrice': float})
    submission_data['SalePrice'].round(decimals=2)
    submission_data.to_csv('../data/submissions/nearest-neighbors.csv', header=['Id', 'SalePrice'], index=False)
Ejemplo n.º 12
0
def _train(train_data: DataFrame,
           regressor: RegressorMixin,
           clusterer: Clustering,
           do_cv=False) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']

            if do_cv:
                cross_validation_result = cross_validate(
                    regressor,
                    cluster_train_df.drop('label', 1),
                    cluster_targets_df.values.ravel(),
                    return_estimator=True,
                    cv=10  #TODO per Chiara check se vuoi 10 cv
                )

                validation_scores = cross_validation_result['test_score']
                regressors = cross_validation_result['estimator']
                regressor = regressors[dict(
                    zip(validation_scores, range(len(validation_scores)))
                )[max(
                    validation_scores
                )]]  #TODO per Chiara check se vuoi il max o min o quello che sta in mezzo
            else:
                regressor.fit(cluster_train_df.drop('label', 1),
                              cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.REGRESSOR.value: models
    }
Ejemplo n.º 13
0
 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,
              alpha=1., verbose=False):
     """
     @param      estimator           predictor trained on every bucket
     @param      n_estimators        number of estimators to train
     @param      n_jobs              number of parallel jobs (for training and predicting)
     @param      alpha               proportion of samples resampled for each training
     @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                     to fit the estimators
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     if estimator is None:
         raise ValueError("estimator cannot be null.")
     self.estimator = estimator
     self.n_jobs = n_jobs
     self.alpha = alpha
     self.verbose = verbose
     self.n_estimators = n_estimators
Ejemplo n.º 14
0
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']
            regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {'clusterer': clusterer, PredictiveModels.REGRESSION.value: models}
Ejemplo n.º 15
0
def _cv_estimate(model: RegressorMixin,
                 train_data: pd.DataFrame,
                 features: List[str],
                 y: str,
                 n_splits: int) -> Tuple[pd.Series, List[RegressorMixin]]:

    cv = KFold(n_splits=n_splits)
    models = []
    cv_pred = pd.Series(np.nan, index=train_data.index)

    for train, test in cv.split(train_data):
        m = model.fit(train_data[features].iloc[train], train_data[y].iloc[train])
        cv_pred.iloc[test] = m.predict(train_data[features].iloc[test])
        models += [m]

    return cv_pred, models
Ejemplo n.º 16
0
def bootstrap_mae(
    f: RegressorMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the regressor ``f``, and compute it's bootstrapped mse over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore (predict not on ClassifierMixin)
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(
            y_pred, y, random_state=trial + random_state
        )  # type:ignore
        score = mean_absolute_error(y_true=sample_truth, y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist
Ejemplo n.º 17
0
def bootstrap_regressor(
    f: RegressorMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the regressor f, and compute it's bootstrapped accuracy over the dataset `X`,`y`.
    Generate `num_samples` samples; and seed the resampler with `random_state`.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(
            y_pred, y, random_state=trial + random_state
        )  # type:ignore
        score = mean_squared_error(y_true=sample_truth, y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist
Ejemplo n.º 18
0
 def __init__(self):
     RegressorMixin.__init__(self)
     BaseEstimator.__init__(self)
Ejemplo n.º 19
0
 def _reg():
     return RegressorMixin()
Ejemplo n.º 20
0
    def score(self, X, y, sample_weight=None):
        if len(X) == 1:
            output = self.predict(X)
            return output.shape[0] * mean_squared_error(y, output)

        return RegressorMixin.score(self, X, y, sample_weight=sample_weight)
Ejemplo n.º 21
0
 def score(
     self, X: np.ndarray, y: np.ndarray, sample_weight: Optional[np.ndarray] = None
 ) -> float:
     return RegressorMixin.score(self, X, y, sample_weight)
Ejemplo n.º 22
0
 def score(self, X, y, sample_weight=None):
     return RegressorMixin.score(self, X, y, sample_weight=sample_weight)
 def __init__(self, base_estimator):
     RegressorMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.base_estimator = base_estimator