Esempio n. 1
0
def test_value_counts(stream):
    s = pd.Series(['a', 'b', 'a'])

    a = Series(example=s, stream=stream)

    result = a.value_counts().stream.gather().sink_to_list()

    a.emit(s)
    a.emit(s)

    assert_eq(result[-1], pd.concat([s, s], axis=0).value_counts())
Esempio n. 2
0
def test_value_counts(stream):
    s = cudf.Series([1, 2, 1])

    a = Series(example=s, stream=stream)

    b = a.value_counts()
    assert b._stream_type == 'updating'
    result = b.stream.gather().sink_to_list()

    a.emit(s)
    a.emit(s)

    assert_eq(result[-1], cudf.concat([s, s]).value_counts())
Esempio n. 3
0
 def stream_predict(self, X, *args, **kwargs):
     """predict enabled on streams
     Parameters
     ----------
     X : streamz.dataframe.core.DataFrame
         input data for predictions
     Returns
     -------
     streamz.dataframe.core.DataFrame
         A streaming dataframe containing the predictions
     """
     self._check_stream_inputs(X)
     _y_example = kwargs.pop('y_example', None)
     if _y_example is None:
         _y_example = getattr(self, '_y_example', None)
     if _y_example is None:
         raise AttributeError("""
             No example provided for y, make sure you called stream_partial_fit before,
             or provide a y_example keyword argument when calling this function
         """)
     stream = X.stream.map(self.predict, *args, **kwargs)
     if isinstance(_y_example, pd.Series):
         return Series(stream, example=_y_example)
     if isinstance(_y_example, pd.DataFrame):
         return DataFrame(stream, example=_y_example)
def test_score_stream():
    class MyEstimator(StreamEstimator):
        def partial_fit(self, X, y):
            pass

        def predict(self, X):
            pass

        def score(self, X, y):
            return 1

    n_rows = 20
    X_example, y_example = pd.DataFrame({
        'name': [None] * n_rows,
        'amount': [None] * n_rows
    }), pd.Series([])
    X_stream, y_stream = Stream(), Stream()
    X, y = DataFrame(X_stream, example=X_example), Series(y_stream,
                                                          example=y_example)

    model = MyEstimator()
    score_stream = model.stream_score(X, y)

    score_list = list()
    score_stream.stream.sink(score_list.append)

    score_predicate = lambda: score_list == [1] * n_rows

    await_for(score_predicate, .1)
def test_stream_partial_fit():
    X_example, y_example = pd.DataFrame({
        'name': [],
        'amount': []
    }), pd.Series([])
    X_stream, y_stream = Stream(), Stream()
    X, y = DataFrame(X_stream, example=X_example), Series(y_stream,
                                                          example=y_example)

    model = MyStreamingEstimator()
    fit_results = model.stream_partial_fit(X, y)
    fit_ctr_list = []
    fit_results.map(lambda model: model.fit_ctr).sink(fit_ctr_list.append)

    n_fits = 10
    for i in range(n_fits):
        X_stream.emit(X_example)
        y_stream.emit(y_example)
    predicate = lambda: (list(range(1, n_fits + 1)) == fit_ctr_list)

    wait_for(predicate, .2)
Esempio n. 6
0
 def stream_score(self, X, y, *args, **kwargs):
     self._check_method('score')
     self._check_stream_inputs(X, y)
     stream = X.stream.map(self.score, *args, **kwargs)
     return Series(stream, example=pd.Series([]))