def transform(self, X, **transform_params): # X has to be a 1-dimensional array result = Series(X) if self.window > 0: rolling = Series.rolling(result, window=self.window).mean() # Replacing NA with non rolled values result = rolling.fillna(result) return result
def calculate_metrics(self): self.benchmark_period_returns = \ self.calculate_period_returns(self.benchmark_returns) self.algorithm_period_returns = \ self.calculate_period_returns(self.algorithm_returns) if not self.algorithm_returns.index.equals( self.benchmark_returns.index ): message = "Mismatch between benchmark_returns ({bm_count}) and \ algorithm_returns ({algo_count}) in range {start} : {end}" message = message.format( bm_count=len(self.benchmark_returns), algo_count=len(self.algorithm_returns), start=self.start_date, end=self.end_date ) raise Exception(message) self.num_trading_days = len(self.benchmark_returns) self.trading_day_counts = Series.rolling(self.algorithm_returns, self.num_trading_days).count() #pd.stats.moments.rolling_count(self.algorithm_returns, self.num_trading_days) self.mean_algorithm_returns = \ self.algorithm_returns.cumsum() / self.trading_day_counts self.benchmark_volatility = self.calculate_volatility( self.benchmark_returns) self.algorithm_volatility = self.calculate_volatility( self.algorithm_returns) self.treasury_period_return = choose_treasury( self.treasury_curves, self.start_date, self.end_date, self.env, ) self.sharpe = self.calculate_sharpe() # The consumer currently expects a 0.0 value for sharpe in period, # this differs from cumulative which was np.nan. # When factoring out the sharpe_ratio, the different return types # were collapsed into `np.nan`. # TODO: Either fix consumer to accept `np.nan` or make the # `sharpe_ratio` return type configurable. # In the meantime, convert nan values to 0.0 if pd.isnull(self.sharpe): self.sharpe = 0.0 self.sortino = self.calculate_sortino() self.information = self.calculate_information() self.beta, self.algorithm_covariance, self.benchmark_variance, \ self.condition_number, self.eigen_values = self.calculate_beta() self.alpha = self.calculate_alpha() self.excess_return = self.algorithm_period_returns - \ self.treasury_period_return self.max_drawdown = self.calculate_max_drawdown() self.max_leverage = self.calculate_max_leverage()
def test_closed_min_max_datetime(input_dtype, func, closed, expected): # see gh-21704 ser = Series( data=np.arange(10).astype(input_dtype), index=date_range("2000", periods=10), ) result = getattr(ser.rolling("3D", closed=closed), func)() expected = Series(expected, index=ser.index) tm.assert_series_equal(result, expected)
def test_iter_rolling_datetime(expected, expected_index, window): # GH 11704 ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) expected = [ Series(values, index=idx) for (values, idx) in zip(expected, expected_index) ] for (expected, actual) in zip(expected, ser.rolling(window)): tm.assert_series_equal(actual, expected)
def throughput(s: pd.Series, window_size_ms: float, trim: bool = False) -> pd.Series: """ Consider a series of timestamps: timestamp 0 11:00:01 am 1 11:00:03 am 2 11:00:54 am 3 11:01:34 am 4 11:02:16 am Imagine we divide the data into 1 minute rolling windows with every window having its right edge be a entry in the dataframe. We'd get the following windows and latencies: timestamps 10:59:01 am - 11:00:01 am | [0] | 10:59:03 am - 11:00:03 am | [0, 1] | 10:59:54 am - 11:00:54 am | [0, 1, 2] | 11:00:34 am - 11:01:34 am | [2, 3] | 11:01:16 am - 11:02:16 am | [2, 3, 4] | If we count the number of entries in each window and divide by the window size, we get the throughput of each window measured in events per second. throughput 10:59:01 am - 11:00:01 am | 1 / 60 | 10:59:03 am - 11:00:03 am | 2 / 60 | 10:59:54 am - 11:00:54 am | 3 / 60 | 11:00:34 am - 11:01:34 am | 2 / 60 | 11:01:16 am - 11:02:16 am | 3 / 60 | This is what `throughput` computes. If `trim` is true, the first window_size_ms of throughput data is trimmed. """ s = pd.Series(0, index=s.sort_values()) throughput = (s.rolling(f'{window_size_ms}ms').count() / (window_size_ms / 1000)) if trim: t = (throughput.index[0] + pd.DateOffset(microseconds=window_size_ms * 1000)) return throughput[throughput.index >= t] else: # TODO(mwhittaker): Fix up. It's a little jank. start_time = throughput.index[0] offset = pd.DateOffset(microseconds=window_size_ms * 1000) for i, (index, row) in enumerate(s.iteritems(), start=1): if i < 100: continue if index > start_time + offset: return throughput[100:] throughput[index] = i / (index - start_time).total_seconds() return throughput[100:]
def test_missing_minp_zero_variable(): # https://github.com/pandas-dev/pandas/pull/18921 x = Series( [np.nan] * 4, index=pd.DatetimeIndex( ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] ), ) result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() expected = Series(0.0, index=x.index) tm.assert_series_equal(result, expected)
def get_mdd(balance: Series) -> float: """ MDD(Max Draw Down)을 구하는 함수 :param balance: :return: """ ath = balance.rolling(len(balance), min_periods=1).max() dd = balance - ath mdd = dd.rolling(len(dd), min_periods=1).min() / ath return mdd.min()
def define_short_candlestick(self, short_cs_period=15): self.issuer_list['Short_CS'] = '' avrSize = Series.rolling(self.issuer_list.Size, window=short_cs_period, min_periods=short_cs_period).mean( ) # count average SIZE for period for index in range(len(avrSize)): if ( self.issuer_list.Size[index] < 0.51 * avrSize[index] ): # if current cs_size less that avr Size add "SCS" to dataframe self.issuer_list.set_value(index, 'Short_CS', 'SCS')
def define_long_candlestick(self, long_cs_period=5): self.issuer_list['Long_CS'] = '' avrSize = Series.rolling( self.issuer_list.Size, window=long_cs_period, min_periods=long_cs_period).mean() # count average SIZE for period for index in range( len(avrSize) ): # if current cs_size greater that avr Size add "LCS" to dataframe if (self.issuer_list.Size[index] > 1.3 * avrSize[index]): self.issuer_list.set_value(index, 'Long_CS', 'LCS')
def test_minutes_freq_max(self): # GH 21096 n = 10 index = date_range(start="2018-1-1 01:00:00", freq="1min", periods=n) s = Series(data=0, index=index) s.iloc[1] = np.nan s.iloc[-1] = 2 result = s.rolling(window=f"{n}min").max() expected = Series(data=[0] * (n - 1) + [2.0], index=index) tm.assert_series_equal(result, expected)
def rolling(x: pd.Series, window: int, func: Callable, groupfreq: AnyStr = '') -> np.ndarray: """ Apply functions over rolling window """ if groupfreq: x = x.groupby(pd.Grouper(freq=groupfreq)) return x.rolling(window=window).apply(func).values
def test_rolling_numerical_too_large_numbers(): # GH: 11645 dates = date_range("2015-01-01", periods=10, freq="D") ds = Series(data=range(10), index=dates, dtype=np.float64) ds[2] = -9e33 result = ds.rolling(5).mean() expected = Series( [np.nan, np.nan, np.nan, np.nan, -1.8e33, -1.8e33, -1.8e33, 5.0, 6.0, 7.0], index=dates, ) tm.assert_series_equal(result, expected)
def test_stationarity(timeseries: Series): movingAverage = timeseries.rolling(window=50).mean() movingSTD = timeseries.rolling(window=50).std() plt.figure(figsize=(15, 10)) orig = plt.plot(timeseries, color='c', label='Original') mean = plt.plot(movingAverage, color='red', label='Rolling Mean') std = plt.plot(movingSTD, color='black', label='Rolling Std') plt.legend(loc='best') plt.title("Rolling Mean & Standard Deviation") plt.show(block=False) plt.savefig("../graph/test_stationarity") print("Results of Dickey-Fuller Test:") dftest = adfuller(timeseries, autolag="AIC") dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used']) for key, value in dftest[4].items(): dfoutput['Critical Value(%s)' % key] = value print(dfoutput)
def test_closed_median_quantile(closed, expected): # GH 26005 ser = Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) roll = ser.rolling("3D", closed=closed) expected = Series(expected, index=ser.index) result = roll.median() tm.assert_series_equal(result, expected) result = roll.quantile(0.5) tm.assert_series_equal(result, expected)
def test_center(roll_func, kwargs, minp): obj = Series(np.random.randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(**kwargs) expected = (getattr( concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func)(**kwargs).iloc[9:].reset_index(drop=True)) tm.assert_series_equal(result, expected)
def ta_multi_bbands(s: _pd.Series, period=12, stddevs=[0.5, 1.0, 1.5, 2.0], ddof=1, include_mean=True) -> _PANDAS: assert not has_indexed_columns(s) mean = s.rolling(period).mean().rename("mean") std = s.rolling(period).std(ddof=ddof) df = _pd.DataFrame({}, index=mean.index) for stddev in reversed(stddevs): df[f'lower-{stddev}'] = mean - (std * stddev) if include_mean: df["mean"] = mean for stddev in stddevs: df[f'upper-{stddev}'] = mean + (std * stddev) return df
def test_rolling_kurt_edge_cases(step): expected = Series([np.NaN] * 4 + [-3.0])[::step] # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5, step=step).kurt() tm.assert_series_equal(expected, x) # yields all NaN (window too small) expected = Series([np.NaN] * 5)[::step] d = Series(np.random.randn(5)) x = d.rolling(window=3, step=step).kurt() tm.assert_series_equal(expected, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])[::step] x = d.rolling(window=4, step=step).kurt() tm.assert_series_equal(expected, x)
def test_rolling_skew_edge_cases(step): expected = Series([np.NaN] * 4 + [0.0])[::step] # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5, step=step).skew() # index 4 should be 0 as it contains 5 same obs tm.assert_series_equal(expected, x) expected = Series([np.NaN] * 5)[::step] # yields all NaN (window too small) d = Series(np.random.randn(5)) x = d.rolling(window=2, step=step).skew() tm.assert_series_equal(expected, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])[::step] x = d.rolling(window=4, step=step).skew() tm.assert_series_equal(expected, x)
def test_even_number_window_alignment(): # see discussion in GH 38780 s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) # behavior of index- and datetime-based windows differs here! # s.rolling(window=2, min_periods=1, center=True).mean() result = s.rolling(window="2D", min_periods=1, center=True).mean() expected = Series([0.5, 1.5, 2], index=s.index) tm.assert_series_equal(result, expected)
def test_rolling_std_small_values(): # GH 37051 s = Series( [ 0.00000054, 0.00000053, 0.00000054, ] ) result = s.rolling(2).std() expected = Series([np.nan, 7.071068e-9, 7.071068e-9]) tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
def test_numba_vs_cython(self, jit, nogil, parallel, nopython): def f(x, *args): arg_sum = 0 for arg in args: arg_sum += arg return np.mean(x) + arg_sum if jit: import numba f = numba.jit(f) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} args = (2,) s = Series(range(10)) result = s.rolling(2).apply( f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True ) expected = s.rolling(2).apply(f, engine="cython", args=args, raw=True) tm.assert_series_equal(result, expected)
def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods): ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int)) rolled = ser.rolling(2, min_periods=min_periods) if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": msg = "No numeric types to aggregate" with pytest.raises(DataError, match=msg): getattr(rolled, method)() else: result = getattr(rolled, method)() expected = Series(expected_data, dtype="float64") tm.assert_almost_equal(result, expected)
def find_parameters(segment_data: pd.Series, segment_from_index: int, pat_type: str) -> [int, float, int]: segment = segment_data if len(segment_data) > SMOOTHING_FACTOR * 3: flat_segment = segment_data.rolling(window=SMOOTHING_FACTOR).mean() segment = flat_segment.dropna() segment_median, segment_max_line, segment_min_line = utils.get_distribution_density( segment) height = 0.95 * (segment_max_line - segment_min_line) length = utils.get_pattern_length(segment_data, segment_min_line, segment_max_line, pat_type) return height, length
def fill_ci(series: pd.Series, window: Union[int, str]) -> Figure: """Fill confidence interval defined by SEM over mean of `window`. Window can be interval or offset, eg, '30s'.""" assert is_datetime_or_timedelta_dtype( series.index ), f"Series index must be datetime but is {type(series.index)}" smooth_path = series.rolling(window).mean() path_deviation = series.rolling(window).std() fig, ax = plt.subplots() plt.plot(smooth_path.index, smooth_path, "b") plt.fill_between( path_deviation.index, (smooth_path - 2 * path_deviation), (smooth_path + 2 * path_deviation), color="b", alpha=0.2, ) plt.gcf().autofmt_xdate() return ax
def test_freqs_ops(self, freq, op, result_data): # GH 21096 index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10) s = Series(data=0, index=index) s.iloc[1] = np.nan s.iloc[-1] = 2 result = getattr(s.rolling(window=f"10{freq}"), op)() expected = Series(data=result_data, index=index) tm.assert_series_equal(result, expected)
def trailing_omega_ratio( returns: pd.Series, *, rf: float = 0.0, window: Optional[int] = None, ) -> pd.Series: return returns.rolling(window).apply( omega_ratio, kwargs={ "rf": rf, }, ).iloc[window:]
def test_rolling_apply(engine_and_raw, step): engine, raw = engine_and_raw expected = Series([], dtype="float64") result = expected.rolling(10, step=step).apply(lambda x: x.mean(), engine=engine, raw=raw) tm.assert_series_equal(result, expected) # gh-8080 s = Series([None, None, None]) result = s.rolling(2, min_periods=0, step=step).apply(lambda x: len(x), engine=engine, raw=raw) expected = Series([1.0, 2.0, 2.0])[::step] tm.assert_series_equal(result, expected) result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw) tm.assert_series_equal(result, expected)
def trailing_cagr( returns: pd.Series, *, annualizer: Optional[float] = None, window: Optional[int] = None ) -> pd.Series: return returns.rolling(window).apply( total_return, kwargs={ "annualizer": annualizer, }, ).iloc[window:]
def smooth_with_rolling_average( series: pd.Series, window: int = 7, include_trailing_zeros: bool = True, exclude_negatives: bool = True, ): """Smoothes series with a min period of 1. Series must have a datetime index. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.rolling.html Port of Projections.ts: https://github.com/covid-projections/covid-projections/blob/master/src/common/models/Projection.ts#L715 Args: series: Series with datetime index to smooth. window: Sliding window to average. include_trailing_zeros: Whether or not to NaN out trailing zeroes. exclude_negatives: Exclude negative values from rolling averages. Returns: Smoothed series. """ # Drop trailing NAs so that we don't smooth for day we don't yet have data. series = series.loc[:series.last_valid_index()] if exclude_negatives: series = series.copy() series.loc[series < 0] = None def mean_with_no_trailing_nan(x): """Return mean of series unless last value is nan.""" if np.isnan(x.iloc[-1]): return np.nan return x.mean() # Apply function to a rolling window # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html rolling_average = series.rolling( window, min_periods=1).apply(mean_with_no_trailing_nan) if include_trailing_zeros: return rolling_average last_valid_index = series.replace(0, np.nan).last_valid_index() if last_valid_index: rolling_average[last_valid_index + timedelta(days=1):] = np.nan return rolling_average else: # entirely empty series: return series
def test_center(q): obj = Series(np.random.randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN result = obj.rolling(20, center=True).quantile(q) expected = ( concat([obj, Series([np.NaN] * 9)]) .rolling(20) .quantile(q)[9:] .reset_index(drop=True) ) tm.assert_series_equal(result, expected)
def ema(price: pd.Series, periods: int) -> pd.Series: """ Given a series of price data, calculates the exponential moving average series. """ # Set alpha to 2 / (N + 1), a commonly used value alpha = 2 / (periods + 1) # Obtain weights [a(1-a)^(N-1), ..., a(1-a)^2, a(1-a), a] and normalize them so their sum is 1 weights = [1, *np.cumprod([1 - alpha] * (periods - 1))][::-1] weights = alpha * np.array(weights) weights = weights / weights.sum() return price.rolling(window=periods).apply(lambda s: np.dot(s, weights))
def stochastic(x: pd.Series, x_low: pd.Series = None, x_high: pd.Series = None, period: int = 14): """ Compute the stochastic indicator as well as a smoothed version (ema of span period/3). The stochastic indicator is defined as .. math:: y[t] = 100.0 \\frac{x[t] - \\min(x_{low}[t-period+1:])}{\\max(x_{high}[t-period+1:]) - \\min(x_{low}[t-period+1:])} If you don't have access to a separate data for the low/high records of your data, the function uses the data itself to compute them. For financial data, where high/low values are reached within a trading period, this might not be optimal. :param x: Target data to compute the stochastic indicator. :type x: pd.Series :param x_low: Minimum value of target data to compute the stochastic indicator. :type x_low: pd.Series :param x_high: Maximum value of target data to compute the stochastic indicator. :type x_high: pd.Series :param period: Period over which the stochastic indicator is computed. Default=14. :type period: int :return: A tuple (stochastic, stochastic_smoothed). """ if x_low is None: x_low = x.copy(deep=True) if x_high is None: x_high = x.copy(deep=True) minimum = x_low.rolling(period).min() maximum = x_high.rolling(period).max() y = 100.0 * (x - minimum) / (maximum - minimum) y_smoothed = ema(y, span=int(period / 3.0)) return y, y_smoothed
def trend_signal(rets, lookback, lag): signal = Series.rolling(rets, lookback, min_periods=lookback - 5).std() return signal.shift(lag) signal = trend_signal(returns, 100, 3) trade_friday = signal.resample('W-FRI').mean().resample('B') trade_rets = trade_friday.shift(1) * returns to_index(trade_rets).plot() print('block') vol = Series.rolling(returns, 250, min_periods=200).std() * np.sqrt(250) vol = vol.reindex(trade_rets.index) def shape(rets, ann=250): return rets.mean() / rets.std() * np.sqrt(ann) print(trade_rets) print(len(data)) print(len(trade_rets)) print(len(pd.qcut(vol, 4))) print(trade_rets.groupby(pd.qcut(vol, 4)).agg(shape))
def trend_signal(rets, lookback, lag): signal = Series.rolling(rets, lookback, min_periods=lookback - 5).std() return signal.shift(lag)