def test_online_batch_consistent(): # Batch batch = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) dataset = datasets.ImageSegments() batch_metric = metrics.MacroF1() for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)): y = x.pop('category') y_pred = batch.predict_many(x) batch.fit_many(x, y) for yt, yp in zip(y, y_pred): if yp is not None: batch_metric.update(yt, yp) if i == 30: break # Online online = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) online_metric = metrics.MacroF1() X = pd.read_csv(dataset.path) Y = X.pop('category') for i, (x, y) in enumerate(stream.iter_pandas(X, Y)): y_pred = online.predict_one(x) online.fit_one(x, y) if y_pred is not None: online_metric.update(y, y_pred) if i == 30: break assert online_metric.get() == batch_metric.get()
def yield_datasets(model): from creme import base from creme import datasets from creme import stream from sklearn import datasets as sk_datasets model = guess_model(model) if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)): yield datasets.Phishing() if isinstance(model, base.MultiClassifier): yield datasets.ImageSegments().take(500) if isinstance(model, base.Regressor): yield datasets.TrumpApproval() if isinstance(model, base.MultiOutputRegressor): yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud()) if isinstance(model, base.MultiOutputClassifier): yield datasets.Music()
def yield_datasets(model): from creme import base from creme import compose from creme import datasets from creme import preprocessing from creme import stream from sklearn import datasets as sk_datasets model = guess_model(model) # Classification if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)): yield datasets.Phishing() # Multi-class classification if isinstance(model, base.MultiClassifier): yield datasets.ImageSegments().take(500) # Regression if isinstance(model, base.Regressor): yield datasets.TrumpApproval() # Multi-output regression if isinstance(model, base.MultiOutputRegressor): # 1 yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud()) # 2 class SolarFlare: """One-hot encoded version of `datasets.SolarFlare`.""" def __iter__(self): oh = (compose.SelectType(str) | preprocessing.OneHotEncoder()) + compose.SelectType(int) for x, y in datasets.SolarFlare(): yield oh.transform_one(x), y yield SolarFlare() # Multi-output classification if isinstance(model, base.MultiOutputClassifier): yield datasets.Music()
def yield_datasets(model): from creme import base from creme import datasets from creme import stream from sklearn import datasets as sk_datasets model = guess_model(model) if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)): yield datasets.Phishing() if isinstance(model, base.MultiClassifier): yield datasets.ImageSegments().take(500) if isinstance(model, base.Regressor): yield datasets.TrumpApproval() if isinstance(model, base.MultiOutputRegressor): yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud()) if isinstance(model, base.MultiOutputClassifier): yeast = stream.iter_sklearn_dataset(sk_datasets.fetch_openml('yeast', version=4)) yield itertools.islice(yeast, 100)
def yield_datasets(model): from creme import compose from creme import datasets from creme import preprocessing from creme import stream from creme import utils from sklearn import datasets as sk_datasets # Classification if utils.inspect.isclassifier(model): yield datasets.Phishing() # Multi-class classification if model._multiclass: yield datasets.ImageSegments().take(500) # Regression if utils.inspect.isregressor(model): yield datasets.TrumpApproval() # Multi-output regression if utils.inspect.ismoregressor(model): # 1 yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud()) # 2 class SolarFlare: """One-hot encoded version of `datasets.SolarFlare`.""" def __iter__(self): oh = (compose.SelectType(str) | preprocessing.OneHotEncoder()) + compose.SelectType(int) for x, y in datasets.SolarFlare(): yield oh.transform_one(x), y yield SolarFlare() # Multi-output classification if utils.inspect.ismoclassifier(model): yield datasets.Music()