def predict_proba(self, X): """Predicts the target probability of an entire dataset contained in memory. Parameters ---------- X array-like of shape (n_samples, n_features). Returns ------- Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError( f'Expected {self.n_features_in_} features, got {X.shape[1]}') # river's predictions have to converted to follow the scikit-learn conventions def reshape_probas(y_pred): return [y_pred.get(c, 0) for c in self.classes_] # Make a prediction for each observation y_pred = np.empty(shape=(len(X), len(self.classes_))) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = reshape_probas(self.instance_.predict_proba_one(x)) return y_pred
def predict(self, X): """Predicts the target of an entire dataset contained in memory. Parameters ---------- X array-like of shape (n_samples, n_features). Returns ------- Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError( f'Expected {self.n_features_in_} features, got {X.shape[1]}') # Make a prediction for each observation y_pred = [None] * len(X) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = self.instance_.predict_one(x) # Convert back to the expected labels if an encoder was necessary for binary classification y_pred = np.asarray(y_pred) if hasattr(self, 'label_encoder_'): y_pred = self.label_encoder_.inverse_transform(y_pred.astype(int)) return y_pred
def predict(self, X) -> np.ndarray: """Predicts the target of an entire dataset contained in memory. Parameters ---------- X array-like of shape (n_samples, n_features). Returns ------- Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError( f'Expected {self.n_features_in_} features, got {X.shape[1]}') # Make a prediction for each observation y_pred = np.empty(shape=len(X)) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = self.instance_.predict_one(x) return y_pred
def iter_sklearn_dataset(dataset: "sklearn.utils.Bunch", **kwargs) -> base.typing.Stream: """Iterates rows from one of the datasets provided by scikit-learn. This allows you to use any dataset from [scikit-learn's `datasets` module](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets). For instance, you can use the `fetch_openml` function to get access to all of the datasets from the OpenML website. Parameters ---------- dataset A scikit-learn dataset. kwargs Extra keyword arguments are passed to the underlying call to `stream.iter_array`. Examples -------- >>> import pprint >>> from sklearn import datasets >>> from river import stream >>> dataset = datasets.load_boston() >>> for xi, yi in stream.iter_sklearn_dataset(dataset): ... pprint.pprint(xi) ... print(yi) ... break {'AGE': 65.2, 'B': 396.9, 'CHAS': 0.0, 'CRIM': 0.00632, 'DIS': 4.09, 'INDUS': 2.31, 'LSTAT': 4.98, 'NOX': 0.538, 'PTRATIO': 15.3, 'RAD': 1.0, 'RM': 6.575, 'TAX': 296.0, 'ZN': 18.0} 24.0 """ kwargs["X"] = dataset.data kwargs["y"] = dataset.target try: kwargs["feature_names"] = dataset.feature_names except AttributeError: pass if isinstance(kwargs["X"], pd.DataFrame): yield from stream.iter_pandas(**kwargs) else: yield from stream.iter_array(**kwargs)
def iter_sklearn_dataset(dataset: "sklearn.utils.Bunch", **kwargs) -> base.typing.Stream: """Iterates rows from one of the datasets provided by scikit-learn. This allows you to use any dataset from [scikit-learn's `datasets` module](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets). For instance, you can use the `fetch_openml` function to get access to all of the datasets from the OpenML website. Parameters ---------- dataset A scikit-learn dataset. kwargs Extra keyword arguments are passed to the underlying call to `stream.iter_array`. Examples -------- >>> import pprint >>> from sklearn import datasets >>> from river import stream >>> dataset = datasets.load_diabetes() >>> for xi, yi in stream.iter_sklearn_dataset(dataset): ... pprint.pprint(xi) ... print(yi) ... break {'age': 0.0380759064334241, 'bmi': 0.0616962065186885, 'bp': 0.0218723549949558, 's1': -0.0442234984244464, 's2': -0.0348207628376986, 's3': -0.0434008456520269, 's4': -0.00259226199818282, 's5': 0.0199084208763183, 's6': -0.0176461251598052, 'sex': 0.0506801187398187} 151.0 """ kwargs["X"] = dataset.data kwargs["y"] = dataset.target try: kwargs["feature_names"] = dataset.feature_names except AttributeError: pass if isinstance(kwargs["X"], pd.DataFrame): yield from stream.iter_pandas(**kwargs) else: yield from stream.iter_array(**kwargs)
def iter_pandas(X: pd.DataFrame, y: typing.Union[pd.Series, pd.DataFrame] = None, **kwargs) -> base.typing.Stream: """Iterates over the rows of a `pandas.DataFrame`. Parameters ---------- X A dataframe of features. y A series or a dataframe with one column per target. kwargs Extra keyword arguments are passed to the underlying call to `stream.iter_array`. Examples -------- >>> import pandas as pd >>> from river import stream >>> X = pd.DataFrame({ ... 'x1': [1, 2, 3, 4], ... 'x2': ['blue', 'yellow', 'yellow', 'blue'], ... 'y': [True, False, False, True] ... }) >>> y = X.pop('y') >>> for xi, yi in stream.iter_pandas(X, y): ... print(xi, yi) {'x1': 1, 'x2': 'blue'} True {'x1': 2, 'x2': 'yellow'} False {'x1': 3, 'x2': 'yellow'} False {'x1': 4, 'x2': 'blue'} True """ kwargs['feature_names'] = X.columns if isinstance(y, pd.DataFrame): kwargs['target_names'] = y.columns yield from stream.iter_array(X=X.to_numpy(), y=y if y is None else y.to_numpy(), **kwargs)
from river import compose from river import linear_model from river import metrics from river import evaluate from river import preprocessing from river import optim from river import stream import river model = preprocessing.StandardScaler() model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001)) s2 = stream.iter_array(Xa) audit = [model.predict_one(i[0]) for i in s2] plt.imshow(reshape_vals(audit)) s1 = stream.iter_array(Xc, a0.ev(contexts)) for c, v in s1: model.learn_one(c, v) s2 = stream.iter_array(Xa) audit = [model.predict_one(i[0]) for i in s2] plt.imshow(reshape_vals(audit)) metric = metrics.RMSE() evaluate.progressive_val_score(stream.iter_array(Xc, a0.ev(contexts)), model, metric, print_every=int(Xc.shape[0]/20)) s2 = stream.iter_array(Xa)