def predict(self, X): """Predicts the target of an entire dataset contained in memory. Parameters: X: array-like of shape (n_samples, n_features). Returns: array of shape (n_samples,): Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError(f'Expected {self.n_features_in_} features, got {X.shape[1]}') # Make a prediction for each observation y_pred = [None] * len(X) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = self.instance_.predict_one(x) # Convert back to the expected labels if an encoder was necessary for BinaryClassifier y_pred = np.asarray(y_pred) if hasattr(self, 'label_encoder_'): y_pred = self.label_encoder_.inverse_transform(y_pred.astype(int)) return y_pred
def predict_proba(self, X): """Predicts the target probability of an entire dataset contained in memory. Parameters: X (array-like of shape (n_samples, n_features)) Returns: array of shape (n_samples,): Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError(f'Expected {self.n_features_in_} features, got {X.shape[1]}') # creme's predictions have to converted to follow the scikit-learn conventions def reshape_probas(y_pred): return [y_pred.get(c, 0) for c in self.classes_] # Make a prediction for each observation y_pred = np.empty(shape=(len(X), len(self.classes_))) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = reshape_probas(self.instance_.predict_proba_one(x)) return y_pred
def predict(self, X): """Predicts the target of an entire dataset contained in memory. Parameters: X: array-like of shape (n_samples, n_features). Returns: array of shape (n_samples,): Predicted target values for each row of `X`. """ # Check the fit method has been called utils.validation.check_is_fitted(self, attributes='instance_') # Check the input X = utils.check_array(X, **SKLEARN_INPUT_X_PARAMS) if X.shape[1] != self.n_features_in_: raise ValueError(f'Expected {self.n_features_in_} features, got {X.shape[1]}') # Make a prediction for each observation y_pred = np.empty(shape=len(X)) for i, (x, _) in enumerate(stream.iter_array(X)): y_pred[i] = self.instance_.predict_one(x) return y_pred
def iter_pandas(X: pd.DataFrame, y: typing.Union[pd.Series, pd.DataFrame] = None, **kwargs) -> base.typing.Stream: """Iterates over the rows of a `pandas.DataFrame`. Parameters: X: A dataframe of features. y: A series or a dataframe with one column per target. Example: >>> import pandas as pd >>> from creme import stream >>> X = pd.DataFrame({ ... 'x1': [1, 2, 3, 4], ... 'x2': ['blue', 'yellow', 'yellow', 'blue'], ... 'y': [True, False, False, True] ... }) >>> y = X.pop('y') >>> for xi, yi in stream.iter_pandas(X, y): ... print(xi, yi) {'x1': 1, 'x2': 'blue'} True {'x1': 2, 'x2': 'yellow'} False {'x1': 3, 'x2': 'yellow'} False {'x1': 4, 'x2': 'blue'} True """ kwargs['feature_names'] = X.columns if isinstance(y, pd.DataFrame): kwargs['target_names'] = y.columns yield from stream.iter_array(X=X.to_numpy(), y=y if y is None else y.to_numpy(), **kwargs)
def iter_sklearn_dataset(dataset: 'sklearn.utils.Bunch', **kwargs) -> base.typing.Stream: """Iterates rows from one of the datasets provided by scikit-learn. This allows you to use any dataset from [scikit-learn's `datasets` module](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets). For instance, you can use the `fetch_openml` function to get access to all of the datasets from the OpenML website. Parameters: dataset: A scikit-learn dataset. Example: >>> import pprint >>> from sklearn import datasets >>> from creme import stream >>> dataset = datasets.load_boston() >>> for xi, yi in stream.iter_sklearn_dataset(dataset): ... pprint.pprint(xi) ... print(yi) ... break {'AGE': 65.2, 'B': 396.9, 'CHAS': 0.0, 'CRIM': 0.00632, 'DIS': 4.09, 'INDUS': 2.31, 'LSTAT': 4.98, 'NOX': 0.538, 'PTRATIO': 15.3, 'RAD': 1.0, 'RM': 6.575, 'TAX': 296.0, 'ZN': 18.0} 24.0 """ kwargs['X'] = dataset.data kwargs['y'] = dataset.target try: kwargs['feature_names'] = dataset.feature_names except AttributeError: pass yield from stream.iter_array(**kwargs)
def iter_sklearn_dataset(dataset: 'sklearn.utils.Bunch', **kwargs) -> base.typing.Stream: """Yields rows from one of the datasets provided by scikit-learn. Parameters: dataset: A scikit-learn dataset. Example: >>> import pprint >>> from sklearn import datasets >>> from creme import stream >>> dataset = datasets.load_boston() >>> for xi, yi in stream.iter_sklearn_dataset(dataset): ... pprint.pprint(xi) ... print(yi) ... break {'AGE': 65.2, 'B': 396.9, 'CHAS': 0.0, 'CRIM': 0.00632, 'DIS': 4.09, 'INDUS': 2.31, 'LSTAT': 4.98, 'NOX': 0.538, 'PTRATIO': 15.3, 'RAD': 1.0, 'RM': 6.575, 'TAX': 296.0, 'ZN': 18.0} 24.0 """ kwargs['X'] = dataset.data kwargs['y'] = dataset.target try: kwargs['feature_names'] = dataset.feature_names except AttributeError: pass yield from stream.iter_array(**kwargs)