def test_online_batch_consistent(): # Batch batch = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) dataset = datasets.ImageSegments() batch_metric = metrics.MacroF1() for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)): y = x.pop('category') y_pred = batch.predict_many(x) batch.fit_many(x, y) for yt, yp in zip(y, y_pred): if yp is not None: batch_metric.update(yt, yp) if i == 30: break # Online online = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) online_metric = metrics.MacroF1() X = pd.read_csv(dataset.path) Y = X.pop('category') for i, (x, y) in enumerate(stream.iter_pandas(X, Y)): y_pred = online.predict_one(x) online.fit_one(x, y) if y_pred is not None: online_metric.update(y, y_pred) if i == 30: break assert online_metric.get() == batch_metric.get()
def test_one_many_consistent(): """Checks that using fit_one or fit_many produces the same result.""" X = pd.read_csv(datasets.TrumpApproval().path) Y = X.pop('five_thirty_eight') one = lm.LinearRegression() for x, y in stream.iter_pandas(X, Y): one.fit_one(x, y) many = lm.LinearRegression() for xb, yb in zip(np.array_split(X, len(X)), np.array_split(Y, len(Y))): many.fit_many(xb, yb) for i in X: assert math.isclose(one.weights[i], many.weights[i])
def test_standard_scaler_one_many_consistent(): """Checks that using fit_one or fit_many produces the same result.""" X = pd.read_csv(datasets.TrumpApproval().path) one = preprocessing.StandardScaler() for x, _ in stream.iter_pandas(X): one.fit_one(x) many = preprocessing.StandardScaler() for xb in np.array_split(X, 10): many.fit_many(xb) for i in X: assert math.isclose(one.counts[i], many.counts[i]) assert math.isclose(one.means[i], many.means[i]) assert math.isclose(one.vars[i], many.vars[i])