def test_rolling_mean(min_periods, window_length, gap, rolling_series_pd):
    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)

    # Since we're using a uniform series we can check correctness using numeric parameters
    expected_vals = (_roll_series_with_gap(
        rolling_series_pd,
        window_length_num,
        gap=gap_num,
        min_periods=min_periods).mean().values)

    primitive_instance = RollingMean(window_length=window_length,
                                     gap=gap,
                                     min_periods=min_periods)
    primitive_func = primitive_instance.get_function()

    actual_vals = pd.Series(
        primitive_func(rolling_series_pd.index,
                       pd.Series(rolling_series_pd.values)))

    # Since min_periods of 0 is the same as min_periods of 1
    num_nans_from_min_periods = min_periods or 1

    assert actual_vals.isna().sum() == gap_num + num_nans_from_min_periods - 1
    pd.testing.assert_series_equal(pd.Series(expected_vals), actual_vals)
def test_rolling_std(min_periods, window_length, gap, rolling_series_pd):
    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)

    # Since we're using a uniform series we can check correctness using numeric parameters
    expected_vals = (_roll_series_with_gap(
        rolling_series_pd,
        window_length_num,
        gap=gap_num,
        min_periods=min_periods).std().values)

    primitive_instance = RollingSTD(window_length=window_length,
                                    gap=gap,
                                    min_periods=min_periods)
    primitive_func = primitive_instance.get_function()

    actual_vals = pd.Series(
        primitive_func(rolling_series_pd.index,
                       pd.Series(rolling_series_pd.values)))

    # Since min_periods of 0 is the same as min_periods of 1
    num_nans_from_min_periods = min_periods or 2

    if min_periods in [0, 1]:
        # the additional nan is because std pandas function returns NaN if there's only one value
        num_nans = gap_num + 1
    else:
        num_nans = gap_num + num_nans_from_min_periods - 1

    # The extra 1 at the beinning is because the std pandas function returns NaN if there's only one value
    assert actual_vals.isna().sum() == num_nans
    pd.testing.assert_series_equal(pd.Series(expected_vals), actual_vals)
Example #3
0
def test_roll_series_with_gap(window_length, gap, rolling_series_pd):
    rolling_max = _roll_series_with_gap(rolling_series_pd, window_length, gap=gap).max()
    rolling_min = _roll_series_with_gap(rolling_series_pd, window_length, gap=gap).min()

    assert len(rolling_max) == len(rolling_series_pd)
    assert len(rolling_min) == len(rolling_series_pd)

    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)
    for i in range(len(rolling_series_pd)):
        start_idx = i - gap_num - window_length_num + 1

        if isinstance(gap, str):
            # No gap functionality is happening, so gap isn't taken account in the end index
            # it's like the gap is 0; it includes the row itself
            end_idx = i
        else:
            end_idx = i - gap_num

        # If start and end are negative, they're entirely before
        if start_idx < 0 and end_idx < 0:
            assert pd.isnull(rolling_max.iloc[i])
            assert pd.isnull(rolling_min.iloc[i])
            continue

        if start_idx < 0:
            start_idx = 0

        # Because the row values are a range from 0 to 20, the rolling min will be the start index
        # and the rolling max will be the end idx
        assert rolling_min.iloc[i] == start_idx
        assert rolling_max.iloc[i] == end_idx
Example #4
0
def test_roll_series_with_gap_early_values(window_length, gap, rolling_series_pd):
    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)

    # Default min periods is 1 - will include all
    default_partial_values = _roll_series_with_gap(
        rolling_series_pd, window_length, gap=gap
    ).count()
    num_empty_aggregates = len(default_partial_values.loc[default_partial_values == 0])
    num_partial_aggregates = len(
        (default_partial_values.loc[default_partial_values != 0]).loc[
            default_partial_values < window_length_num
        ]
    )

    assert num_partial_aggregates == window_length_num - 1
    if isinstance(gap, str):
        # gap isn't handled, so we'll always at least include the row itself
        assert num_empty_aggregates == 0
    else:
        assert num_empty_aggregates == gap_num

    # Make min periods the size of the window
    no_partial_values = _roll_series_with_gap(
        rolling_series_pd, window_length, gap=gap, min_periods=window_length_num
    ).count()
    num_null_aggregates = len(no_partial_values.loc[pd.isna(no_partial_values)])
    num_partial_aggregates = len(
        no_partial_values.loc[no_partial_values < window_length_num]
    )

    # because we shift, gap is included as nan values in the series.
    # Count treats nans in a window as values that don't get counted,
    # so the gap rows get included in the count for whether a window has "min periods".
    # This is different than max, for example, which does not count nans in a window as values towards "min periods"
    assert num_null_aggregates == window_length_num - 1
    if isinstance(gap, str):
        # gap isn't handled, so we'll never have any partial aggregates
        assert num_partial_aggregates == 0
    else:
        assert num_partial_aggregates == gap_num
def test_rolling_count(window_length, gap, rolling_series_pd):
    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)

    expected_vals = (_roll_series_with_gap(
        rolling_series_pd,
        window_length_num,
        gap=gap_num,
        min_periods=window_length_num,
    ).count().values)

    primitive_instance = RollingCount(window_length=window_length,
                                      gap=gap,
                                      min_periods=window_length_num)
    primitive_func = primitive_instance.get_function()

    actual_vals = pd.Series(primitive_func(rolling_series_pd.index))

    num_nans = gap_num + window_length_num - 1
    assert actual_vals.isna().sum() == num_nans
    # RollingCount will not match the exact _roll_series_with_gap call,
    # because it handles the min_periods difference within the primitive
    pd.testing.assert_series_equal(
        pd.Series(expected_vals).iloc[num_nans:], actual_vals.iloc[num_nans:])
Example #6
0
def test_apply_roll_with_offset_gap(window_length, gap, rolling_series_pd):
    def max_wrapper(sub_s):
        return _apply_roll_with_offset_gap(sub_s, gap, max, min_periods=1)

    rolling_max_obj = _roll_series_with_gap(rolling_series_pd, window_length, gap=gap)
    rolling_max_series = rolling_max_obj.apply(max_wrapper)

    def min_wrapper(sub_s):
        return _apply_roll_with_offset_gap(sub_s, gap, min, min_periods=1)

    rolling_min_obj = _roll_series_with_gap(rolling_series_pd, window_length, gap=gap)
    rolling_min_series = rolling_min_obj.apply(min_wrapper)

    assert len(rolling_max_series) == len(rolling_series_pd)
    assert len(rolling_min_series) == len(rolling_series_pd)

    gap_num = get_number_from_offset(gap)
    window_length_num = get_number_from_offset(window_length)
    for i in range(len(rolling_series_pd)):
        start_idx = i - gap_num - window_length_num + 1
        # Now that we have the _apply call, this acts as expected
        end_idx = i - gap_num

        # If start and end are negative, they're entirely before
        if start_idx < 0 and end_idx < 0:
            assert pd.isnull(rolling_max_series.iloc[i])
            assert pd.isnull(rolling_min_series.iloc[i])
            continue

        if start_idx < 0:
            start_idx = 0

        # Because the row values are a range from 0 to 20, the rolling min will be the start index
        # and the rolling max will be the end idx
        assert rolling_min_series.iloc[i] == start_idx
        assert rolling_max_series.iloc[i] == end_idx