Exemple #1
0
    def predict_proba(self, X):
        """Predicts the target probability of an entire dataset contained in memory.

        Parameters
        ----------
        X
            array-like of shape (n_samples, n_features).

        Returns
        -------
        Predicted target values for each row of `X`.

        """

        # Check the fit method has been called
        utils.validation.check_is_fitted(self, attributes='instance_')

        # Check the input
        X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS)

        if X.shape[1] != self.n_features_in_:
            raise ValueError(
                f'Expected {self.n_features_in_} features, got {X.shape[1]}')

        # river's predictions have to converted to follow the scikit-learn conventions
        def reshape_probas(y_pred):
            return [y_pred.get(c, 0) for c in self.classes_]

        # Make a prediction for each observation
        y_pred = np.empty(shape=(len(X), len(self.classes_)))
        for i, (x, _) in enumerate(stream.iter_array(X)):
            y_pred[i] = reshape_probas(self.instance_.predict_proba_one(x))

        return y_pred
Exemple #2
0
    def predict(self, X):
        """Predicts the target of an entire dataset contained in memory.

        Parameters
        ----------
        X
            array-like of shape (n_samples, n_features).

        Returns
        -------
        Predicted target values for each row of `X`.

        """

        # Check the fit method has been called
        utils.validation.check_is_fitted(self, attributes='instance_')

        # Check the input
        X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS)

        if X.shape[1] != self.n_features_in_:
            raise ValueError(
                f'Expected {self.n_features_in_} features, got {X.shape[1]}')

        # Make a prediction for each observation
        y_pred = [None] * len(X)
        for i, (x, _) in enumerate(stream.iter_array(X)):
            y_pred[i] = self.instance_.predict_one(x)

        # Convert back to the expected labels if an encoder was necessary for binary classification
        y_pred = np.asarray(y_pred)
        if hasattr(self, 'label_encoder_'):
            y_pred = self.label_encoder_.inverse_transform(y_pred.astype(int))

        return y_pred
Exemple #3
0
    def predict(self, X) -> np.ndarray:
        """Predicts the target of an entire dataset contained in memory.

        Parameters
        ----------
        X
            array-like of shape (n_samples, n_features).

        Returns
        -------
        Predicted target values for each row of `X`.

        """

        # Check the fit method has been called
        utils.validation.check_is_fitted(self, attributes='instance_')

        # Check the input
        X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS)

        if X.shape[1] != self.n_features_in_:
            raise ValueError(
                f'Expected {self.n_features_in_} features, got {X.shape[1]}')

        # Make a prediction for each observation
        y_pred = np.empty(shape=len(X))
        for i, (x, _) in enumerate(stream.iter_array(X)):
            y_pred[i] = self.instance_.predict_one(x)

        return y_pred
Exemple #4
0
def iter_sklearn_dataset(dataset: "sklearn.utils.Bunch",
                         **kwargs) -> base.typing.Stream:
    """Iterates rows from one of the datasets provided by scikit-learn.

    This allows you to use any dataset from [scikit-learn's `datasets` module](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets). For instance, you can use the `fetch_openml` function to get access to all of the
    datasets from the OpenML website.

    Parameters
    ----------
    dataset
        A scikit-learn dataset.
    kwargs
        Extra keyword arguments are passed to the underlying call to `stream.iter_array`.

    Examples
    --------

    >>> import pprint
    >>> from sklearn import datasets
    >>> from river import stream

    >>> dataset = datasets.load_boston()

    >>> for xi, yi in stream.iter_sklearn_dataset(dataset):
    ...     pprint.pprint(xi)
    ...     print(yi)
    ...     break
    {'AGE': 65.2,
        'B': 396.9,
        'CHAS': 0.0,
        'CRIM': 0.00632,
        'DIS': 4.09,
        'INDUS': 2.31,
        'LSTAT': 4.98,
        'NOX': 0.538,
        'PTRATIO': 15.3,
        'RAD': 1.0,
        'RM': 6.575,
        'TAX': 296.0,
        'ZN': 18.0}
    24.0

    """
    kwargs["X"] = dataset.data
    kwargs["y"] = dataset.target
    try:
        kwargs["feature_names"] = dataset.feature_names
    except AttributeError:
        pass

    if isinstance(kwargs["X"], pd.DataFrame):
        yield from stream.iter_pandas(**kwargs)
    else:
        yield from stream.iter_array(**kwargs)
Exemple #5
0
def iter_sklearn_dataset(dataset: "sklearn.utils.Bunch",
                         **kwargs) -> base.typing.Stream:
    """Iterates rows from one of the datasets provided by scikit-learn.

    This allows you to use any dataset from [scikit-learn's `datasets` module](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets). For instance, you can use the `fetch_openml` function to get access to all of the
    datasets from the OpenML website.

    Parameters
    ----------
    dataset
        A scikit-learn dataset.
    kwargs
        Extra keyword arguments are passed to the underlying call to `stream.iter_array`.

    Examples
    --------

    >>> import pprint
    >>> from sklearn import datasets
    >>> from river import stream

    >>> dataset = datasets.load_diabetes()

    >>> for xi, yi in stream.iter_sklearn_dataset(dataset):
    ...     pprint.pprint(xi)
    ...     print(yi)
    ...     break
    {'age': 0.0380759064334241,
     'bmi': 0.0616962065186885,
     'bp': 0.0218723549949558,
     's1': -0.0442234984244464,
     's2': -0.0348207628376986,
     's3': -0.0434008456520269,
     's4': -0.00259226199818282,
     's5': 0.0199084208763183,
     's6': -0.0176461251598052,
     'sex': 0.0506801187398187}
    151.0

    """
    kwargs["X"] = dataset.data
    kwargs["y"] = dataset.target
    try:
        kwargs["feature_names"] = dataset.feature_names
    except AttributeError:
        pass

    if isinstance(kwargs["X"], pd.DataFrame):
        yield from stream.iter_pandas(**kwargs)
    else:
        yield from stream.iter_array(**kwargs)
Exemple #6
0
def iter_pandas(X: pd.DataFrame,
                y: typing.Union[pd.Series, pd.DataFrame] = None,
                **kwargs) -> base.typing.Stream:
    """Iterates over the rows of a `pandas.DataFrame`.

    Parameters
    ----------
    X
        A dataframe of features.
    y
        A series or a dataframe with one column per target.
    kwargs
        Extra keyword arguments are passed to the underlying call to `stream.iter_array`.

    Examples
    --------

    >>> import pandas as pd
    >>> from river import stream

    >>> X = pd.DataFrame({
    ...     'x1': [1, 2, 3, 4],
    ...     'x2': ['blue', 'yellow', 'yellow', 'blue'],
    ...     'y': [True, False, False, True]
    ... })
    >>> y = X.pop('y')

    >>> for xi, yi in stream.iter_pandas(X, y):
    ...     print(xi, yi)
    {'x1': 1, 'x2': 'blue'} True
    {'x1': 2, 'x2': 'yellow'} False
    {'x1': 3, 'x2': 'yellow'} False
    {'x1': 4, 'x2': 'blue'} True

    """

    kwargs['feature_names'] = X.columns
    if isinstance(y, pd.DataFrame):
        kwargs['target_names'] = y.columns

    yield from stream.iter_array(X=X.to_numpy(),
                                 y=y if y is None else y.to_numpy(),
                                 **kwargs)
Exemple #7
0
from river import compose
from river import linear_model
from river import metrics
from river import evaluate
from river import preprocessing
from river import optim
from river import stream
import river



model = preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

s2 = stream.iter_array(Xa)
audit = [model.predict_one(i[0]) for i in s2]
plt.imshow(reshape_vals(audit))

s1 = stream.iter_array(Xc, a0.ev(contexts))
for c, v in s1:
    model.learn_one(c, v)

s2 = stream.iter_array(Xa)
audit = [model.predict_one(i[0]) for i in s2]
plt.imshow(reshape_vals(audit))

metric = metrics.RMSE()
evaluate.progressive_val_score(stream.iter_array(Xc, a0.ev(contexts)), model, metric, print_every=int(Xc.shape[0]/20))

s2 = stream.iter_array(Xa)