def test_value_counts(stream): s = pd.Series(['a', 'b', 'a']) a = Series(example=s, stream=stream) result = a.value_counts().stream.gather().sink_to_list() a.emit(s) a.emit(s) assert_eq(result[-1], pd.concat([s, s], axis=0).value_counts())
def test_value_counts(stream): s = cudf.Series([1, 2, 1]) a = Series(example=s, stream=stream) b = a.value_counts() assert b._stream_type == 'updating' result = b.stream.gather().sink_to_list() a.emit(s) a.emit(s) assert_eq(result[-1], cudf.concat([s, s]).value_counts())
def stream_predict(self, X, *args, **kwargs): """predict enabled on streams Parameters ---------- X : streamz.dataframe.core.DataFrame input data for predictions Returns ------- streamz.dataframe.core.DataFrame A streaming dataframe containing the predictions """ self._check_stream_inputs(X) _y_example = kwargs.pop('y_example', None) if _y_example is None: _y_example = getattr(self, '_y_example', None) if _y_example is None: raise AttributeError(""" No example provided for y, make sure you called stream_partial_fit before, or provide a y_example keyword argument when calling this function """) stream = X.stream.map(self.predict, *args, **kwargs) if isinstance(_y_example, pd.Series): return Series(stream, example=_y_example) if isinstance(_y_example, pd.DataFrame): return DataFrame(stream, example=_y_example)
def test_score_stream(): class MyEstimator(StreamEstimator): def partial_fit(self, X, y): pass def predict(self, X): pass def score(self, X, y): return 1 n_rows = 20 X_example, y_example = pd.DataFrame({ 'name': [None] * n_rows, 'amount': [None] * n_rows }), pd.Series([]) X_stream, y_stream = Stream(), Stream() X, y = DataFrame(X_stream, example=X_example), Series(y_stream, example=y_example) model = MyEstimator() score_stream = model.stream_score(X, y) score_list = list() score_stream.stream.sink(score_list.append) score_predicate = lambda: score_list == [1] * n_rows await_for(score_predicate, .1)
def test_stream_partial_fit(): X_example, y_example = pd.DataFrame({ 'name': [], 'amount': [] }), pd.Series([]) X_stream, y_stream = Stream(), Stream() X, y = DataFrame(X_stream, example=X_example), Series(y_stream, example=y_example) model = MyStreamingEstimator() fit_results = model.stream_partial_fit(X, y) fit_ctr_list = [] fit_results.map(lambda model: model.fit_ctr).sink(fit_ctr_list.append) n_fits = 10 for i in range(n_fits): X_stream.emit(X_example) y_stream.emit(y_example) predicate = lambda: (list(range(1, n_fits + 1)) == fit_ctr_list) wait_for(predicate, .2)
def stream_score(self, X, y, *args, **kwargs): self._check_method('score') self._check_stream_inputs(X, y) stream = X.stream.map(self.score, *args, **kwargs) return Series(stream, example=pd.Series([]))