Example #1
0
 def transform(self, X, **transform_params):
     # X has to be a 1-dimensional array
     result = Series(X)
     if self.window > 0:
         rolling = Series.rolling(result, window=self.window).mean()
         # Replacing NA with non rolled values
         result = rolling.fillna(result)
     return result
Example #2
0
    def calculate_metrics(self):

        self.benchmark_period_returns = \
            self.calculate_period_returns(self.benchmark_returns)

        self.algorithm_period_returns = \
            self.calculate_period_returns(self.algorithm_returns)

        if not self.algorithm_returns.index.equals(
            self.benchmark_returns.index
        ):
            message = "Mismatch between benchmark_returns ({bm_count}) and \
            algorithm_returns ({algo_count}) in range {start} : {end}"
            message = message.format(
                bm_count=len(self.benchmark_returns),
                algo_count=len(self.algorithm_returns),
                start=self.start_date,
                end=self.end_date
            )
            raise Exception(message)

        self.num_trading_days = len(self.benchmark_returns)
        self.trading_day_counts = Series.rolling(self.algorithm_returns, self.num_trading_days).count()
        #pd.stats.moments.rolling_count(self.algorithm_returns, self.num_trading_days)

        self.mean_algorithm_returns = \
            self.algorithm_returns.cumsum() / self.trading_day_counts

        self.benchmark_volatility = self.calculate_volatility(
            self.benchmark_returns)
        self.algorithm_volatility = self.calculate_volatility(
            self.algorithm_returns)
        self.treasury_period_return = choose_treasury(
            self.treasury_curves,
            self.start_date,
            self.end_date,
            self.env,
        )
        self.sharpe = self.calculate_sharpe()
        # The consumer currently expects a 0.0 value for sharpe in period,
        # this differs from cumulative which was np.nan.
        # When factoring out the sharpe_ratio, the different return types
        # were collapsed into `np.nan`.
        # TODO: Either fix consumer to accept `np.nan` or make the
        # `sharpe_ratio` return type configurable.
        # In the meantime, convert nan values to 0.0
        if pd.isnull(self.sharpe):
            self.sharpe = 0.0
        self.sortino = self.calculate_sortino()
        self.information = self.calculate_information()
        self.beta, self.algorithm_covariance, self.benchmark_variance, \
            self.condition_number, self.eigen_values = self.calculate_beta()
        self.alpha = self.calculate_alpha()
        self.excess_return = self.algorithm_period_returns - \
            self.treasury_period_return
        self.max_drawdown = self.calculate_max_drawdown()
        self.max_leverage = self.calculate_max_leverage()
Example #3
0
def test_closed_min_max_datetime(input_dtype, func, closed, expected):
    # see gh-21704
    ser = Series(
        data=np.arange(10).astype(input_dtype),
        index=date_range("2000", periods=10),
    )

    result = getattr(ser.rolling("3D", closed=closed), func)()
    expected = Series(expected, index=ser.index)
    tm.assert_series_equal(result, expected)
Example #4
0
def test_iter_rolling_datetime(expected, expected_index, window):
    # GH 11704
    ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D"))

    expected = [
        Series(values, index=idx) for (values, idx) in zip(expected, expected_index)
    ]

    for (expected, actual) in zip(expected, ser.rolling(window)):
        tm.assert_series_equal(actual, expected)
Example #5
0
def throughput(s: pd.Series,
               window_size_ms: float,
               trim: bool = False) -> pd.Series:
    """
    Consider a series of timestamps:

        timestamp
      0 11:00:01 am
      1 11:00:03 am
      2 11:00:54 am
      3 11:01:34 am
      4 11:02:16 am

    Imagine we divide the data into 1 minute rolling windows with every window
    having its right edge be a entry in the dataframe. We'd get the following
    windows and latencies:

                                  timestamps
      10:59:01 am - 11:00:01 am | [0]       |
      10:59:03 am - 11:00:03 am | [0, 1]    |
      10:59:54 am - 11:00:54 am | [0, 1, 2] |
      11:00:34 am - 11:01:34 am | [2, 3]    |
      11:01:16 am - 11:02:16 am | [2, 3, 4] |

    If we count the number of entries in each window and divide by the window
    size, we get the throughput of each window measured in events per second.

                                  throughput
      10:59:01 am - 11:00:01 am | 1 / 60     |
      10:59:03 am - 11:00:03 am | 2 / 60     |
      10:59:54 am - 11:00:54 am | 3 / 60     |
      11:00:34 am - 11:01:34 am | 2 / 60     |
      11:01:16 am - 11:02:16 am | 3 / 60     |

    This is what `throughput` computes. If `trim` is true, the first
    window_size_ms of throughput data is trimmed.
    """
    s = pd.Series(0, index=s.sort_values())
    throughput = (s.rolling(f'{window_size_ms}ms').count() /
                  (window_size_ms / 1000))
    if trim:
        t = (throughput.index[0] +
             pd.DateOffset(microseconds=window_size_ms * 1000))
        return throughput[throughput.index >= t]
    else:
        # TODO(mwhittaker): Fix up. It's a little jank.
        start_time = throughput.index[0]
        offset = pd.DateOffset(microseconds=window_size_ms * 1000)
        for i, (index, row) in enumerate(s.iteritems(), start=1):
            if i < 100:
                continue
            if index > start_time + offset:
                return throughput[100:]
            throughput[index] = i / (index - start_time).total_seconds()
        return throughput[100:]
Example #6
0
def test_missing_minp_zero_variable():
    # https://github.com/pandas-dev/pandas/pull/18921
    x = Series(
        [np.nan] * 4,
        index=pd.DatetimeIndex(
            ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]
        ),
    )
    result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum()
    expected = Series(0.0, index=x.index)
    tm.assert_series_equal(result, expected)
Example #7
0
    def get_mdd(balance: Series) -> float:
        """
        MDD(Max Draw Down)을 구하는 함수
        :param balance:
        :return:
        """
        ath = balance.rolling(len(balance), min_periods=1).max()
        dd = balance - ath
        mdd = dd.rolling(len(dd), min_periods=1).min() / ath

        return mdd.min()
 def define_short_candlestick(self, short_cs_period=15):
     self.issuer_list['Short_CS'] = ''
     avrSize = Series.rolling(self.issuer_list.Size,
                              window=short_cs_period,
                              min_periods=short_cs_period).mean(
                              )  # count average SIZE for period
     for index in range(len(avrSize)):
         if (
                 self.issuer_list.Size[index] < 0.51 * avrSize[index]
         ):  # if current cs_size less that avr Size add "SCS" to dataframe
             self.issuer_list.set_value(index, 'Short_CS', 'SCS')
 def define_long_candlestick(self, long_cs_period=5):
     self.issuer_list['Long_CS'] = ''
     avrSize = Series.rolling(
         self.issuer_list.Size,
         window=long_cs_period,
         min_periods=long_cs_period).mean()  # count average SIZE for period
     for index in range(
             len(avrSize)
     ):  # if current cs_size greater that avr Size add "LCS" to dataframe
         if (self.issuer_list.Size[index] > 1.3 * avrSize[index]):
             self.issuer_list.set_value(index, 'Long_CS', 'LCS')
Example #10
0
    def test_minutes_freq_max(self):
        # GH 21096
        n = 10
        index = date_range(start="2018-1-1 01:00:00", freq="1min", periods=n)
        s = Series(data=0, index=index)
        s.iloc[1] = np.nan
        s.iloc[-1] = 2
        result = s.rolling(window=f"{n}min").max()
        expected = Series(data=[0] * (n - 1) + [2.0], index=index)

        tm.assert_series_equal(result, expected)
Example #11
0
    def rolling(x: pd.Series,
                window: int,
                func: Callable,
                groupfreq: AnyStr = '') -> np.ndarray:
        """
        Apply functions over rolling window
        """

        if groupfreq:
            x = x.groupby(pd.Grouper(freq=groupfreq))
        return x.rolling(window=window).apply(func).values
Example #12
0
def test_rolling_numerical_too_large_numbers():
    # GH: 11645
    dates = date_range("2015-01-01", periods=10, freq="D")
    ds = Series(data=range(10), index=dates, dtype=np.float64)
    ds[2] = -9e33
    result = ds.rolling(5).mean()
    expected = Series(
        [np.nan, np.nan, np.nan, np.nan, -1.8e33, -1.8e33, -1.8e33, 5.0, 6.0, 7.0],
        index=dates,
    )
    tm.assert_series_equal(result, expected)
def test_stationarity(timeseries: Series):
    movingAverage = timeseries.rolling(window=50).mean()
    movingSTD = timeseries.rolling(window=50).std()
    plt.figure(figsize=(15, 10))
    orig = plt.plot(timeseries, color='c', label='Original')
    mean = plt.plot(movingAverage, color='red', label='Rolling Mean')
    std = plt.plot(movingSTD, color='black', label='Rolling Std')

    plt.legend(loc='best')
    plt.title("Rolling Mean & Standard Deviation")
    plt.show(block=False)
    plt.savefig("../graph/test_stationarity")

    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value(%s)' % key] = value

    print(dfoutput)
Example #14
0
def test_closed_median_quantile(closed, expected):
    # GH 26005
    ser = Series(data=np.arange(10), index=pd.date_range("2000", periods=10))
    roll = ser.rolling("3D", closed=closed)
    expected = Series(expected, index=ser.index)

    result = roll.median()
    tm.assert_series_equal(result, expected)

    result = roll.quantile(0.5)
    tm.assert_series_equal(result, expected)
Example #15
0
def test_center(roll_func, kwargs, minp):
    obj = Series(np.random.randn(50))
    obj[:10] = np.NaN
    obj[-10:] = np.NaN

    result = getattr(obj.rolling(20, min_periods=minp, center=True),
                     roll_func)(**kwargs)
    expected = (getattr(
        concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp),
        roll_func)(**kwargs).iloc[9:].reset_index(drop=True))
    tm.assert_series_equal(result, expected)
Example #16
0
def ta_multi_bbands(s: _pd.Series,
                    period=12,
                    stddevs=[0.5, 1.0, 1.5, 2.0],
                    ddof=1,
                    include_mean=True) -> _PANDAS:
    assert not has_indexed_columns(s)
    mean = s.rolling(period).mean().rename("mean")
    std = s.rolling(period).std(ddof=ddof)
    df = _pd.DataFrame({}, index=mean.index)

    for stddev in reversed(stddevs):
        df[f'lower-{stddev}'] = mean - (std * stddev)

    if include_mean:
        df["mean"] = mean

    for stddev in stddevs:
        df[f'upper-{stddev}'] = mean + (std * stddev)

    return df
Example #17
0
def test_rolling_kurt_edge_cases(step):

    expected = Series([np.NaN] * 4 + [-3.0])[::step]

    # yields all NaN (0 variance)
    d = Series([1] * 5)
    x = d.rolling(window=5, step=step).kurt()
    tm.assert_series_equal(expected, x)

    # yields all NaN (window too small)
    expected = Series([np.NaN] * 5)[::step]
    d = Series(np.random.randn(5))
    x = d.rolling(window=3, step=step).kurt()
    tm.assert_series_equal(expected, x)

    # yields [NaN, NaN, NaN, 1.224307, 2.671499]
    d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
    expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])[::step]
    x = d.rolling(window=4, step=step).kurt()
    tm.assert_series_equal(expected, x)
Example #18
0
def test_rolling_skew_edge_cases(step):

    expected = Series([np.NaN] * 4 + [0.0])[::step]
    # yields all NaN (0 variance)
    d = Series([1] * 5)
    x = d.rolling(window=5, step=step).skew()
    # index 4 should be 0 as it contains 5 same obs
    tm.assert_series_equal(expected, x)

    expected = Series([np.NaN] * 5)[::step]
    # yields all NaN (window too small)
    d = Series(np.random.randn(5))
    x = d.rolling(window=2, step=step).skew()
    tm.assert_series_equal(expected, x)

    # yields [NaN, NaN, NaN, 0.177994, 1.548824]
    d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
    expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])[::step]
    x = d.rolling(window=4, step=step).skew()
    tm.assert_series_equal(expected, x)
Example #19
0
def test_even_number_window_alignment():
    # see discussion in GH 38780
    s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))

    # behavior of index- and datetime-based windows differs here!
    # s.rolling(window=2, min_periods=1, center=True).mean()

    result = s.rolling(window="2D", min_periods=1, center=True).mean()

    expected = Series([0.5, 1.5, 2], index=s.index)

    tm.assert_series_equal(result, expected)
Example #20
0
def test_rolling_std_small_values():
    # GH 37051
    s = Series(
        [
            0.00000054,
            0.00000053,
            0.00000054,
        ]
    )
    result = s.rolling(2).std()
    expected = Series([np.nan, 7.071068e-9, 7.071068e-9])
    tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
Example #21
0
    def test_numba_vs_cython(self, jit, nogil, parallel, nopython):
        def f(x, *args):
            arg_sum = 0
            for arg in args:
                arg_sum += arg
            return np.mean(x) + arg_sum

        if jit:
            import numba

            f = numba.jit(f)

        engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
        args = (2,)

        s = Series(range(10))
        result = s.rolling(2).apply(
            f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
        )
        expected = s.rolling(2).apply(f, engine="cython", args=args, raw=True)
        tm.assert_series_equal(result, expected)
Example #22
0
def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods):
    ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
    rolled = ser.rolling(2, min_periods=min_periods)

    if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
        msg = "No numeric types to aggregate"
        with pytest.raises(DataError, match=msg):
            getattr(rolled, method)()
    else:
        result = getattr(rolled, method)()
        expected = Series(expected_data, dtype="float64")
        tm.assert_almost_equal(result, expected)
Example #23
0
def find_parameters(segment_data: pd.Series, segment_from_index: int,
                    pat_type: str) -> [int, float, int]:
    segment = segment_data
    if len(segment_data) > SMOOTHING_FACTOR * 3:
        flat_segment = segment_data.rolling(window=SMOOTHING_FACTOR).mean()
        segment = flat_segment.dropna()
    segment_median, segment_max_line, segment_min_line = utils.get_distribution_density(
        segment)
    height = 0.95 * (segment_max_line - segment_min_line)
    length = utils.get_pattern_length(segment_data, segment_min_line,
                                      segment_max_line, pat_type)
    return height, length
Example #24
0
def fill_ci(series: pd.Series, window: Union[int, str]) -> Figure:
    """Fill confidence interval defined by SEM over mean of `window`. Window can be interval or offset, eg, '30s'."""
    assert is_datetime_or_timedelta_dtype(
        series.index
    ), f"Series index must be datetime but is {type(series.index)}"
    smooth_path = series.rolling(window).mean()
    path_deviation = series.rolling(window).std()

    fig, ax = plt.subplots()

    plt.plot(smooth_path.index, smooth_path, "b")
    plt.fill_between(
        path_deviation.index,
        (smooth_path - 2 * path_deviation),
        (smooth_path + 2 * path_deviation),
        color="b",
        alpha=0.2,
    )

    plt.gcf().autofmt_xdate()
    return ax
    def test_freqs_ops(self, freq, op, result_data):
        # GH 21096
        index = date_range(start="2018-1-1 01:00:00",
                           freq=f"1{freq}",
                           periods=10)
        s = Series(data=0, index=index)
        s.iloc[1] = np.nan
        s.iloc[-1] = 2
        result = getattr(s.rolling(window=f"10{freq}"), op)()
        expected = Series(data=result_data, index=index)

        tm.assert_series_equal(result, expected)
Example #26
0
def trailing_omega_ratio(
        returns: pd.Series,
        *,
        rf: float = 0.0,
        window: Optional[int] = None,
) -> pd.Series:
    return returns.rolling(window).apply(
        omega_ratio,
        kwargs={
            "rf": rf,
        },
    ).iloc[window:]
Example #27
0
def test_rolling_apply(engine_and_raw, step):
    engine, raw = engine_and_raw

    expected = Series([], dtype="float64")
    result = expected.rolling(10, step=step).apply(lambda x: x.mean(),
                                                   engine=engine,
                                                   raw=raw)
    tm.assert_series_equal(result, expected)

    # gh-8080
    s = Series([None, None, None])
    result = s.rolling(2, min_periods=0, step=step).apply(lambda x: len(x),
                                                          engine=engine,
                                                          raw=raw)
    expected = Series([1.0, 2.0, 2.0])[::step]
    tm.assert_series_equal(result, expected)

    result = s.rolling(2, min_periods=0, step=step).apply(len,
                                                          engine=engine,
                                                          raw=raw)
    tm.assert_series_equal(result, expected)
Example #28
0
def trailing_cagr(
        returns: pd.Series,
        *,
        annualizer: Optional[float] = None,
        window: Optional[int] = None
) -> pd.Series:
    return returns.rolling(window).apply(
        total_return,
        kwargs={
            "annualizer": annualizer,
        },
    ).iloc[window:]
Example #29
0
def smooth_with_rolling_average(
    series: pd.Series,
    window: int = 7,
    include_trailing_zeros: bool = True,
    exclude_negatives: bool = True,
):
    """Smoothes series with a min period of 1.

    Series must have a datetime index.

    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.rolling.html

    Port of Projections.ts:
    https://github.com/covid-projections/covid-projections/blob/master/src/common/models/Projection.ts#L715

    Args:
        series: Series with datetime index to smooth.
        window: Sliding window to average.
        include_trailing_zeros: Whether or not to NaN out trailing zeroes.
        exclude_negatives: Exclude negative values from rolling averages.

    Returns:
        Smoothed series.
    """
    # Drop trailing NAs so that we don't smooth for day we don't yet have data.
    series = series.loc[:series.last_valid_index()]

    if exclude_negatives:
        series = series.copy()
        series.loc[series < 0] = None

    def mean_with_no_trailing_nan(x):
        """Return mean of series unless last value is nan."""
        if np.isnan(x.iloc[-1]):
            return np.nan

        return x.mean()

    # Apply function to a rolling window
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html
    rolling_average = series.rolling(
        window, min_periods=1).apply(mean_with_no_trailing_nan)
    if include_trailing_zeros:
        return rolling_average

    last_valid_index = series.replace(0, np.nan).last_valid_index()

    if last_valid_index:
        rolling_average[last_valid_index + timedelta(days=1):] = np.nan
        return rolling_average
    else:  # entirely empty series:
        return series
Example #30
0
def test_center(q):
    obj = Series(np.random.randn(50))
    obj[:10] = np.NaN
    obj[-10:] = np.NaN

    result = obj.rolling(20, center=True).quantile(q)
    expected = (
        concat([obj, Series([np.NaN] * 9)])
        .rolling(20)
        .quantile(q)[9:]
        .reset_index(drop=True)
    )
    tm.assert_series_equal(result, expected)
Example #31
0
def ema(price: pd.Series, periods: int) -> pd.Series:
    """
    Given a series of price data, calculates the exponential moving average series.
    """
    # Set alpha to 2 / (N + 1), a commonly used value
    alpha = 2 / (periods + 1)

    # Obtain weights [a(1-a)^(N-1), ..., a(1-a)^2, a(1-a), a] and normalize them so their sum is 1
    weights = [1, *np.cumprod([1 - alpha] * (periods - 1))][::-1]
    weights = alpha * np.array(weights)
    weights = weights / weights.sum()

    return price.rolling(window=periods).apply(lambda s: np.dot(s, weights))
Example #32
0
def stochastic(x: pd.Series,
               x_low: pd.Series = None,
               x_high: pd.Series = None,
               period: int = 14):
    """
    Compute the stochastic indicator as well as a smoothed version (ema of span period/3). The stochastic indicator
    is defined as

    .. math::
        y[t] = 100.0 \\frac{x[t] - \\min(x_{low}[t-period+1:])}{\\max(x_{high}[t-period+1:]) - \\min(x_{low}[t-period+1:])}

    If you don't have access to a separate data for the low/high records of your data, the function uses the data
    itself to compute them. For financial data, where high/low values are reached within a trading period, this might
    not be optimal.

    :param  x: Target data to compute the stochastic indicator.
    :type   x: pd.Series
    :param  x_low: Minimum value of target data to compute the stochastic indicator.
    :type   x_low: pd.Series
    :param  x_high: Maximum value of target data to compute the stochastic indicator.
    :type   x_high: pd.Series
    :param  period: Period over which the stochastic indicator is computed. Default=14.
    :type   period: int
    :return: A tuple (stochastic, stochastic_smoothed).
    """

    if x_low is None:
        x_low = x.copy(deep=True)

    if x_high is None:
        x_high = x.copy(deep=True)

    minimum = x_low.rolling(period).min()
    maximum = x_high.rolling(period).max()
    y = 100.0 * (x - minimum) / (maximum - minimum)

    y_smoothed = ema(y, span=int(period / 3.0))

    return y, y_smoothed
Example #33
0

def trend_signal(rets, lookback, lag):
    signal = Series.rolling(rets, lookback, min_periods=lookback - 5).std()
    return signal.shift(lag)


signal = trend_signal(returns, 100, 3)

trade_friday = signal.resample('W-FRI').mean().resample('B')

trade_rets = trade_friday.shift(1) * returns

to_index(trade_rets).plot()

print('block')

vol = Series.rolling(returns, 250, min_periods=200).std() * np.sqrt(250)

vol  = vol.reindex(trade_rets.index)
def shape(rets, ann=250):
    return rets.mean() / rets.std() * np.sqrt(ann)
print(trade_rets)
print(len(data))
print(len(trade_rets))
print(len(pd.qcut(vol, 4)))



print(trade_rets.groupby(pd.qcut(vol, 4)).agg(shape))
Example #34
0
def trend_signal(rets, lookback, lag):
    signal = Series.rolling(rets, lookback, min_periods=lookback - 5).std()
    return signal.shift(lag)