def kama(x, n=10, pow1=2, pow2=30): """KAMA: Kaufmans Adaptive Moving Average. Params: x (Series): Time series data such as close prices. n (int): number of periods for the Efficiency Ratio (ER). pow1 (int): number of periods for the fastest EMA constant. pow2 (int): number of periods for the slowest EMA constant. Returns: Series: Kaufmans adaptive moving average of x. """ nan_count = x[pd.isnull(x)].size x = Series(x.dropna().values, name = x.name, index = x.index) change = (x - x.shift(n)).abs() volatility = (x - x.shift(1)).abs().rolling(window=n).sum() er = change / volatility sc = (er * (2.0 /(pow1 + 1.0) - 2.0 / (pow2 + 1.0)) + 2.0 / (pow2 + 1.0)) ** 2.0 kama = [np.nan] * sc.size first_value = True for i in range(len(kama)): if not pd.isnull(sc[i]): if first_value: kama[i] = x[i] first_value = False else: kama[i] = kama[i-1] + sc[i] * (x[i] - kama[i-1]) return Series(data = [np.nan] * nan_count + kama, name = "kama(%d,%d,%d)" % (n, pow1, pow2), index = x.index)
def test_shift_dst(self): # GH 13926 dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern') s = Series(dates) res = s.shift(0) tm.assert_series_equal(res, s) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') res = s.shift(1) exp_vals = [NaT] + dates.asobject.values.tolist()[:9] exp = Series(exp_vals) tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') res = s.shift(-2) exp_vals = dates.asobject.values.tolist()[2:] + [NaT, NaT] exp = Series(exp_vals) tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]') for ex in [10, -10, 20, -20]: res = s.shift(ex) exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]') tm.assert_series_equal(res, exp) self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')
def test_operators_na_handling(self): from decimal import Decimal from datetime import date s = Series([Decimal('1.3'), Decimal('2.3')], index=[date(2012, 1, 1), date(2012, 1, 2)]) result = s + s.shift(1) result2 = s.shift(1) + s assert isna(result[0]) assert isna(result2[0])
def test_shift2(self): ts = Series(np.random.randn(5), index=date_range('1/1/2000', periods=5, freq='H')) result = ts.shift(1, freq='5T') exp_index = ts.index.shift(1, freq='5T') tm.assert_index_equal(result.index, exp_index) # GH #1063, multiple of same base result = ts.shift(1, freq='4H') exp_index = ts.index + offsets.Hour(4) tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) self.assertRaises(ValueError, idx.shift, 1)
class Timeseries(object): goal_time = 0.2 params = [None, 'US/Eastern'] param_names = ['tz'] def setup(self, tz): self.N = 10**6 self.halfway = ((self.N // 2) - 1) self.s = Series(date_range('20010101', periods=self.N, freq='T', tz=tz)) self.ts = self.s[self.halfway] self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz=tz)) def time_series_timestamp_compare(self, tz): self.s <= self.ts def time_timestamp_series_compare(self, tz): self.ts >= self.s def time_timestamp_ops_diff(self, tz): self.s2.diff() def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift()
def addStrategy(self, name, strategy): signals = [(1 if strategy(i, self.df.iloc[i], self.stock) else 0) for i in range(self.df['Adj Close'].count())] signal = Series(signals, self.df.index) close = self.df['Adj Close'] unit_income = np.log(close / close.shift(1)) * signal.shift(1) self.i_table[name] = np.exp(unit_income.cumsum())
def shiftTs(): dates = [datetime(2014,1,2), datetime(2014,1,3), datetime(2014,1,4), datetime(2014,1,5)] ts1 = Series(np.arange(4)+2, index=dates) #ts1 = ts1/ts1.shift(1) - 1 print (ts1) ts1 = ts1.shift(1, freq='M') print (ts1)
def test_dti_shift_across_dst(self): # GH 8616 idx = date_range('2013-11-03', tz='America/Chicago', periods=7, freq='H') s = Series(index=idx[:-1]) result = s.shift(freq='H') expected = Series(index=idx[1:]) tm.assert_series_equal(result, expected)
def test_dti_shift_near_midnight(self, shift, result_time): # GH 8616 dt = datetime(2014, 11, 14, 0) dt_est = pytz.timezone('EST').localize(dt) s = Series(data=[1], index=[dt_est]) result = s.shift(shift, freq='H') expected = Series(1, index=DatetimeIndex([result_time], tz='EST')) tm.assert_series_equal(result, expected)
def test_shift2(self): ts = Series(np.random.randn(5), index=date_range('1/1/2000', periods=5, freq='H')) result = ts.shift(1, freq='5T') exp_index = ts.index.shift(1, freq='5T') tm.assert_index_equal(result.index, exp_index) # GH #1063, multiple of same base result = ts.shift(1, freq='4H') exp_index = ts.index + offsets.Hour(4) tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) msg = "Cannot shift with no freq" with pytest.raises(NullFrequencyError, match=msg): idx.shift(1)
def test_shift_multiple_of_same_base(self): # GH #1063 ts = Series(np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H")) result = ts.shift(1, freq="4H") exp_index = ts.index + datetools.Hour(4) self.assert_(result.index.equals(exp_index))
def test_shift_fill_value(self): # GH #24128 ts = Series([1.0, 2.0, 3.0, 4.0, 5.0], index=date_range('1/1/2000', periods=5, freq='H')) exp = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=date_range('1/1/2000', periods=5, freq='H')) # check that fill value works result = ts.shift(1, fill_value=0.0) tm.assert_series_equal(result, exp) exp = Series([0.0, 0.0, 1.0, 2.0, 3.0], index=date_range('1/1/2000', periods=5, freq='H')) result = ts.shift(2, fill_value=0.0) tm.assert_series_equal(result, exp) ts = pd.Series([1, 2, 3]) res = ts.shift(2, fill_value=0) assert res.dtype == ts.dtype
def test_operators_na_handling(self): from decimal import Decimal from datetime import date s = Series([Decimal('1.3'), Decimal('2.3')], index=[date(2012, 1, 1), date(2012, 1, 2)]) result = s + s.shift(1) result2 = s.shift(1) + s self.assertTrue(isnull(result[0])) self.assertTrue(isnull(result2[0])) s = Series(['foo', 'bar', 'baz', np.nan]) result = 'prefix_' + s expected = Series(['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan]) assert_series_equal(result, expected) result = s + '_suffix' expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan]) assert_series_equal(result, expected)
def test_timedelta64(self): from pandas import date_range from datetime import datetime, timedelta Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string() s = Series(date_range('2012-1-1', periods=3, freq='D')) # GH2146 # adding NaTs y = s-s.shift(1) result = y.to_string() self.assertTrue('1 days, 00:00:00' in result) self.assertTrue('NaT' in result) # with frac seconds o = Series([datetime(2012,1,1,microsecond=150)]*3) y = s-o result = y.to_string() self.assertTrue('-00:00:00.000150' in result) # rounding? o = Series([datetime(2012,1,1,1)]*3) y = s-o result = y.to_string() self.assertTrue('-01:00:00' in result) self.assertTrue('1 days, 23:00:00' in result) o = Series([datetime(2012,1,1,1,1)]*3) y = s-o result = y.to_string() self.assertTrue('-01:01:00' in result) self.assertTrue('1 days, 22:59:00' in result) o = Series([datetime(2012,1,1,1,1,microsecond=150)]*3) y = s-o result = y.to_string() self.assertTrue('-01:01:00.000150' in result) self.assertTrue('1 days, 22:58:59.999850' in result) # neg time td = timedelta(minutes=5,seconds=3) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td y = s - s2 result = y.to_string() self.assertTrue('-00:05:03' in result) td = timedelta(microseconds=550) s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td y = s - td result = y.to_string() self.assertTrue('2012-01-01 23:59:59.999450' in result)
def test_comparison_object_numeric_nas(self): ser = Series(np.random.randn(10), dtype=object) shifted = ser.shift(2) ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: func = getattr(operator, op) result = func(ser, shifted) expected = func(ser.astype(float), shifted.astype(float)) tm.assert_series_equal(result, expected)
def DONCH(df, n): i = 0 DC_l = [] while i < n - 1: DC_l.append(0) i = i + 1 i = 0 while i + n - 1 < df.index[-1]: DC = max(df["High"].ix[i : i + n - 1]) - min(df["Low"].ix[i : i + n - 1]) DC_l.append(DC) i = i + 1 DonCh = Series(DC_l, name="Donchian_" + str(n)) DonCh = DonCh.shift(n - 1) df = df.join(DonCh) return df
def kama(close, length=None, fast=None, slow=None, drift=None, offset=None, **kwargs): """Indicator: Kaufman's Adaptive Moving Average (HMA)""" # Validate Arguments close = verify_series(close) length = int(length) if length and length > 0 else 10 fast = int(fast) if fast and fast > 0 else 2 slow = int(slow) if slow and slow > 0 else 30 drift = get_drift(drift) offset = get_offset(offset) # Calculate Result m = close.size def weight(length: int) -> float: return 2 / (length + 1) fr = weight(fast) sr = weight(slow) abs_diff = non_zero_range(close, close.shift(length)).abs() peer_diff = non_zero_range(close, close.shift(drift)).abs() peer_diff_sum = peer_diff.rolling(length).sum() er = abs_diff / peer_diff_sum x = er * (fr - sr) + sr sc = x * x result = [npNaN for _ in range(0, length - 1)] + [0] for i in range(length, m): result.append(sc[i] * close[i] + (1 - sc[i]) * result[i - 1]) kama = Series(result, index=close.index) # Offset if offset != 0: kama = kama.shift(offset) # Name & Category kama.name = f"KAMA_{length}_{fast}_{slow}" kama.category = "overlap" return kama
def trend_return(close, trend, log=None, cumulative=None, offset=None, trend_reset=0, **kwargs): """Indicator: Trend Return""" # Validate Arguments close = verify_series(close) trend = verify_series(trend) offset = get_offset(offset) trend_reset = int(trend_reset) if trend_reset and isinstance( trend_reset, int) else 0 # Calculate Result returns = log_return(close, cumulative=False) if log else percent_return( close, cumulative=False) m = trend.size tsum = 0 trend = trend.astype(int) returns = (trend * returns).apply(zero) result = [] for i in range(0, m): if trend[i] == trend_reset: tsum = 0 else: return_ = returns[i] if cumulative: tsum += return_ else: tsum = return_ result.append(tsum) trend_return = Series(result) # Offset if offset != 0: trend_return = trend_return.shift(offset) # Name & Category trend_return.name = f"{'C' if cumulative else ''}{'L' if log else 'P'}TR" trend_return.category = 'performance' return trend_return
def ta_future_multiband_bucket(df: _pd.Series, forecast_period=14, period=5, stddevs=[0.5, 1.0, 1.5, 2.0], ddof=1): buckets = _i.ta_multi_bbands(df, period, stddevs=stddevs, ddof=ddof) future = df.shift(-forecast_period) # return index of bucket of which the future price lies in def index_of_bucket(value, data): if _np.isnan(value): return value for i, v in enumerate(data): if value < v: return i return len(data) return \ buckets.join(future).apply(lambda row: index_of_bucket(row[future.name], row[buckets.columns]), axis=1, raw=False)
def points_grid_to_poly(gpd, id_col): """ Function to convert a GeoDataFrame of evenly spaced gridded points to square polygons. Output is a GeoDataFrame of the same length as input. gpd -- GeoDataFrame of gridded points with an id column.\n id_col -- The id column name. """ geo1a = Series(gpd.geometry.apply(lambda j: j.x)) geo1b = geo1a.shift() side_len1 = (geo1b - geo1a).abs() side_len = side_len1[side_len1 > 0].min() gpd1 = gpd.apply( lambda j: point_to_poly_apply(j.geometry, side_len=side_len), axis=1) gpd2 = GeoDataFrame(gpd[id_col], geometry=gpd1, crs=gpd.crs) return (gpd2)
def roc(values: pd.Series, period: int = 1) -> pd.Series: """ Calculate the rate of change of a price serie. Parameters ---------- values : pd.Series serie of data period : int number of period Returns ------- pd.Series rate of change of the `values` paramater """ return 100 * values.diff(period) / values.shift(period)
def get_target_return(t: pd.Series, nb_days=365, weeks=None, months=None, trim_data=False) -> pd.Series: """Retourne le rendement obtenu si par exemple on achète le titre à une certaine date et qu'on le revend nb_days plus tard. """ if months is not None: weeks = months * 4 if weeks is not None: nb_days = weeks * 7 # Ajout d'un index sur tous les jours. On assume que les rendements # sont persistés, par exemple ceux du vendredi égalent ceux du samedi et # du dimanche idx = pd.date_range(start=t.index.min(), end=t.index.max(), freq="D") t = t.reindex(idx, method="ffill") vi = t vf = t.shift(-nb_days) r = vf / vi - 1 # Annualisation r = (1 + r)**(365 / nb_days) - 1 r = 100 * r # On peut maintenant supprimer l'index contenant les fds pour ne conserver que # jours définis par t. r_idx = r.index.intersection(t.index) r = r.loc[r_idx] # On conserve uniquement les rendements qui contiennent de l'information Par ex. si # t donne de l'information jusqu'au 2019-10-31 et que nb_days = 365, alors on peut # s'attendre à ce que r soit défini jusqu'à environ 2018-10-31 # On détermine les lignes telles que tous les rendements sont N/A, puis on les # supprime. r = r[~r.isna().all(axis=1)] if trim_data: if isinstance(r, pd.Series): r = r[~r.isna()] elif isinstance(r, pd.DataFrame): r = r[~r.isna().any(axis=1)] return r
def test_datetime_series_shift_with_freq(self, datetime_series): shifted = datetime_series.shift(1, freq="infer") unshifted = shifted.shift(-1, freq="infer") tm.assert_series_equal(datetime_series, unshifted) shifted2 = datetime_series.shift(freq=datetime_series.index.freq) tm.assert_series_equal(shifted, shifted2) inferred_ts = Series(datetime_series.values, Index(np.asarray(datetime_series.index)), name="ts") shifted = inferred_ts.shift(1, freq="infer") expected = datetime_series.shift(1, freq="infer") expected.index = expected.index._with_freq(None) tm.assert_series_equal(shifted, expected) unshifted = shifted.shift(-1, freq="infer") tm.assert_series_equal(unshifted, inferred_ts)
def outs_per_inning(x: pd.Series): """ An aggregation function that takes the sum of a one-time step difference in a `pd.Series`. Intended to be used in a groupby aggregation to calculate the number of outs recorded in an inning. * **usage**: ```python df.groupby(["inning"]).agg({"postouts": outs_per_inning}) ``` * input: - `x`: `pd.Series` * output: - sum of one time-step differences in `x` """ return (x - x.shift(1).fillna(0)).sum()
def get_bar_based_hasbrouck_lambda(close: pd.Series, dollar_volume: pd.Series, window: int = 20) -> pd.Series: """ Advances in Financial Machine Learning, p.289-290. Get Hasbrouck lambda from bars data :param close: (pd.Series) Close prices :param dollar_volume: (pd.Series) Dollar volumes :param window: (int) Rolling window used for estimation :return: (pd.Series) Hasbrouck lambda """ log_ret = np.log(close / close.shift(1)) log_ret_sign = np.sign(log_ret).replace(0, method='pad') signed_dollar_volume_sqrt = log_ret_sign * np.sqrt(dollar_volume) return (log_ret / signed_dollar_volume_sqrt).rolling(window=window).mean()
def fisher(high, low, length=None, offset=None, **kwargs): """Indicator: Fisher Transform (FISHT)""" # Validate Arguments high = verify_series(high) low = verify_series(low) length = int(length) if length and length > 0 else 5 offset = get_offset(offset) # Calculate Result m = high.size hl2_ = hl2(high, low) max_high = hl2_.rolling(length).max() min_low = hl2_.rolling(length).min() hl2_range = max_high - min_low hl2_range[hl2_range < 1e-5] = 0.001 position = (hl2_ - min_low) / hl2_range v = 0 fish = 0 result = [npNaN for _ in range(0, length - 1)] for i in range(length - 1, m): v = 0.66 * (position[i] - 0.5) + 0.67 * v if v > 0.99: v = 0.999 if v < -0.99: v = -0.999 fish = 0.5 * (fish + nplog((1 + v) / (1 - v))) result.append(fish) fisher = Series(result) # Offset if offset != 0: fisher = fisher.shift(offset) # Handle fills if 'fillna' in kwargs: fisher.fillna(kwargs['fillna'], inplace=True) if 'fill_method' in kwargs: fisher.fillna(method=kwargs['fill_method'], inplace=True) # Name and Categorize it fisher.name = f"FISHERT_{length}" fisher.category = 'momentum' return fisher
def price_channel_upper(price_high: Series, period: int) -> Series: """ 가격채널상한선 <설명> 가격 채널 상한선을 구하는 함수입니다. 가격 채널 상한선은 일정 기간 내의 최고가를 이은 선입니다. <사용 방법> 첫 번째 인자에는 고가를, 두 번째 인자에는 가격 채널 상한선을 구하는데 사용하는 기간을 적으면 됩니다. 예를 들어, 20일간 채널 지표 상한선을 구하고자 하는 경우 'price_channel_upper(high, 20)' 또는 '가격채널상한선(고가, 20)'과 같이 작성하면 됩니다. :param price_high: (고가) 고가 :param period: (기간) 가격 채널 상한선을 구할 때 사용하는 기간 :return: """ return price_high.shift(1).rolling(window=period).max()
def price_channel_lower(price_low: Series, period: int) -> Series: """ 가격채널하한선 <설명> 가격 채널 하한선을 구하는 함수입니다. 가격 채널 하한선은 일정 기간 내의 최저가를 이은 선입니다. <사용 방법> 첫 번째 인자에는 저가를, 두 번째 인자에는 가격 채널 하한선을 구하는데 사용하는 기간을 적으면 됩니다. 예를 들어, 20일간 채널 지표 하한선을 구하고자 하는 경우 'price_channel_lower(low, 20)' 또는 '가격채널하한선(저가, 20)'과 같이 작성하면 됩니다. :param price_low: (저가) 저가 :param period: (기간) 가격 채널 상한선을 구하는 기간 :return: """ return price_low.shift(1).rolling(window=period).min()
def crosscorr(data_x: pd.Series, data_y: pd.Series, lag: Optional[int] = 0) -> float: """ Calculate Lag-N cross correlation. Parameters ---------- data_x : pandas Series The first time series data_y : pandas Series The 2nd time series lag : int, optional, default is 0 Lag in days. Returns ------- Cross-correlation with specified lag for the given time series. """ return data_x.corr(data_y.shift(lag, freq="D"))
def vidya(close, length=None, drift=None, offset=None, **kwargs): """Indicator: Variable Index Dynamic Average (VIDYA)""" # Validate Arguments length = int(length) if length and length > 0 else 14 close = verify_series(close, length) drift = get_drift(drift) offset = get_offset(offset) if close is None: return def _cmo(source: Series, n: int, d: int): """Chande Momentum Oscillator (CMO) Patch For some reason: from pandas_ta.momentum import cmo causes pandas_ta.momentum.coppock to not be able to import it's wma like from pandas_ta.overlap import wma? Weird Circular TypeError!?! """ mom = source.diff(d) positive = mom.copy().clip(lower=0) negative = mom.copy().clip(upper=0).abs() pos_sum = positive.rolling(n).sum() neg_sum = negative.rolling(n).sum() return (pos_sum - neg_sum) / (pos_sum + neg_sum) # Calculate Result m = close.size alpha = 2 / (length + 1) abs_cmo = _cmo(close, length, drift).abs() vidya = Series(0, index=close.index) for i in range(length, m): vidya.iloc[i] = alpha * abs_cmo.iloc[i] * close.iloc[i] + vidya.iloc[ i - 1] * (1 - alpha * abs_cmo.iloc[i]) vidya.replace({0: npNaN}, inplace=True) # Offset if offset != 0: vidya = vidya.shift(offset) # Name & Category vidya.name = f"VIDYA_{length}" vidya.category = "overlap" return vidya
def get_feature_return(t: pd.Series, nb_days=[1, 7, 30]): """`t` représente une série temporelle des prix d'un actif. On retourne un dataframe dont chaque colonne représente les rendements des derniers jours, spécifiés par l'argument `nb_days`. """ idx = pd.date_range(start=t.index.min(), end=t.index.max(), freq="D") t = t.reindex(idx, method="ffill") u = {} vf = t for day_shift in nb_days: vi = t.shift(day_shift) r = vf / vi - 1 r = (1 + r)**(365 / day_shift) - 1 r *= 100 u[f"r_{day_shift}"] = r u = pd.DataFrame(u) return u
def lag_time_series(time_series: pd.Series, lags): """ Create lagged time series features. Parameters ---------- time_series : pd.Series lags : list[int] List of lags Returns ------- pd.DataFrame Lagged time series features. """ lagged_time_series = {} for lag in lags: lagged_time_series[str(lag)] = time_series.shift(lag) return pd.concat(lagged_time_series, axis=1)
def max_index(s: pd.Series, n: int = 5): """ 找最大值下标 :param s: :param n: :return: """ if len(s.dropna()) < 5: return pd.Series(index=s.index) cont = [] for i in range(0, n): cont.append(s.shift(i)) k = pd.concat(cont, axis=1) k.columns = [n - i + 1 for i in range(1, n + 1)] m = k.T.idxmax() # 前n-1个不进行比较 m[0:n - 1] = np.nan return m
def rsi_cross_signals(rsi_values: pd.Series, cross_line: float, direction: str = 'rise'): """ Calculates buy/sell signals for given RSI signal line. Returns table with True values for days when signal appears. Parameters ---------- rsi_values : pandas.Series DataFrame with RSI column cross_line : float signal threshold line, when signal line crosses this line signal is set direction : str direction the signal line should cross threshold line ('rise' - signal increasing [default], 'fall' - signal decreasing) Returns ------- pandas.Series """ if not (0 < cross_line < 100): raise ValueError('cross_line takes values from 0 to 100') rsi_copy = pd.DataFrame() rsi_copy['RSI'] = rsi_values rsi_copy['RSI day before'] = rsi_values.shift(1) if 'rise' == direction: # True signal if RSI is increasing and crossing the threshold line output = (rsi_copy['RSI'] >= cross_line) & (rsi_copy['RSI day before'] < cross_line) elif 'fall' == direction: # True signal if RSI is decreasing and crossing the threshold line output = (rsi_copy['RSI'] <= cross_line) & (rsi_copy['RSI day before'] > cross_line) else: raise ValueError('wrong value for direction, must be "rise" or "fall"') output = output.rename(f'Cross signal ({cross_line} on {direction})') return output
def test_argsort(self, datetime_series): self._check_accum_op("argsort", datetime_series, check_dtype=False) argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) # GH 2967 (introduced bug in 0.11-dev I think) s = Series([Timestamp("201301{i:02d}".format(i=i)) for i in range(1, 6)]) assert s.dtype == "datetime64[ns]" shifted = s.shift(-1) assert shifted.dtype == "datetime64[ns]" assert isna(shifted[4]) result = s.argsort() expected = Series(range(5), dtype="int64") tm.assert_series_equal(result, expected) result = shifted.argsort() expected = Series(list(range(4)) + [-1], dtype="int64") tm.assert_series_equal(result, expected)
def wma(close, length=None, asc=None, offset=None, **kwargs): """Indicator: Weighted Moving Average (WMA)""" # Validate Arguments close = verify_series(close) length = int(length) if length and length > 0 else 10 asc = asc if asc else True offset = get_offset(offset) # Calculate Result if Imports["talib"]: from talib import WMA wma = WMA(close, length) else: total_weight = 0.5 * length * (length + 1) weights_ = np.arange(1, length + 1) weights = weights_ if asc else np.flip(weights_) def _linear(x): return np.dot(x, weights) / total_weight values = [ _linear(each) for each in np.lib.stride_tricks.sliding_window_view(np.array(close), length) ] wma_ds = Series([np.NaN] * (length - 1) + values) wma_ds.index = close.index # Offset if offset != 0: wma_ds = wma_ds.shift(offset) # Handle fills if "fillna" in kwargs: wma_ds.fillna(kwargs["fillna"], inplace=True) if "fill_method" in kwargs: wma_ds.fillna(method=kwargs["fill_method"], inplace=True) # Name & Category wma_ds.name = f"WMA_{length}" wma_ds.category = "overlap" return wma_ds
def test_diff(self): # Just run the function self.ts.diff() # int dtype a = 10000000000000000 b = a + 1 s = Series([a, b]) rs = s.diff() assert rs[1] == 1 # neg n rs = self.ts.diff(-1) xp = self.ts - self.ts.shift(-1) assert_series_equal(rs, xp) # 0 rs = self.ts.diff(0) xp = self.ts - self.ts assert_series_equal(rs, xp) # datetime diff (GH3100) s = Series(date_range('20130102', periods=5)) rs = s - s.shift(1) xp = s.diff() assert_series_equal(rs, xp) # timedelta diff nrs = rs - rs.shift(1) nxp = xp.diff() assert_series_equal(nrs, nxp) # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s.diff() assert_series_equal( result, Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
def plot_growth_rate(data: pd.Series, smoothing_window: int = 4) -> None: """ plots the daily growth rate of the time series data (i.e. index of data must consecutive days as DateTime-objects) """ # compute daily growth rate rates = data / data.shift(1, pd.to_timedelta('1d')) rates = rates.dropna() # smoothen by computing thee geometric over a smoothing window # of the specified lenght rates_smooth = rates.rolling(window=smoothing_window * pd.to_timedelta('1d')).apply(gmean) rates_smooth = rates_smooth.dropna() plt.plot(rates_smooth) plt.plot(rates) plt.legend(['Smooth', 'Original']) plt.show()
def get_data_range_iter(s: pd.Series, extent_left=False): """ 从序列数据中迭代输出每一段相同数据的index范围 :param s: :param extent_left: 左边界与上一个迭代的右边界使用同一个值 :return: """ is_new_range, idx_from, idx_to, data = True, s.index[0], None, None for (idx_to, data), (_, d2) in zip(s.items(), s.shift(-1).items()): if is_new_range and not extent_left: idx_from = idx_to is_new_range = False if data != d2 and not (np.isnan(data) and np.isnan(d2)): yield idx_from, idx_to, data if extent_left: idx_from = idx_to is_new_range = True else: if not is_new_range: yield idx_from, idx_to, data
def daily_growth_rate(series: pd.Series, **kwargs): PERIOD = 7 THRESHOLD = 10 # minimum cases per day on average # growth rate series = series.rolling(PERIOD).mean() series = series.where(series >= THRESHOLD, other=np.nan) # ignore small data k = np.log( series / series.shift(PERIOD)) / PERIOD * 100 # daily growth rate % if k.isna().all(): return None fig, ax = plt.subplots() line(ax, k, kwargs) ax.axhline(0, color='#999999', lw=0.5) previous_lfooter = kwargs['lfooter'] if 'lfooter' in kwargs else '' kwargs[ 'lfooter'] = f'When daily new cases >= {THRESHOLD}; ' + previous_lfooter finalise_plot(ax, **kwargs) return None
def hwma(close, na=None, nb=None, nc=None, offset=None, **kwargs): """Indicator: Holt-Winter Moving Average""" # Validate Arguments close = verify_series(close) na = float(na) if na and na > 0 and na < 1 else 0.2 nb = float(nb) if nb and nb > 0 and nb < 1 else 0.1 nc = float(nc) if nc and nc > 0 and nc < 1 else 0.1 offset = get_offset(offset) # Calculate Result last_a = last_v = 0 last_f = close[0] result = [] m = close.size for i in range(m): F = (1.0 - na) * (last_f + last_v + 0.5 * last_a) + na * close[i] V = (1.0 - nb) * (last_v + last_a) + nb * (F - last_f) A = (1.0 - nc) * last_a + nc * (V - last_v) result.append((F + V + 0.5 * A)) # update values last_a, last_f, last_v = A, F, V hwma = Series(result, index=close.index) # Offset if offset != 0: hwma = hwma.shift(offset) # Handle fills if "fillna" in kwargs: hwma.fillna(kwargs["fillna"], inplace=True) if "fill_method" in kwargs: hwma.fillna(method=kwargs["fill_method"], inplace=True) # Name & Category suffix = f"{na}_{nb}_{nc}" hwma.name = f"HWMA_{suffix}" hwma.category = "overlap" return hwma
def test_timedelta64(self): from pandas import date_range from datetime import datetime Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string() # check this works # GH2146 # adding NaTs s = Series(date_range('2012-1-1', periods=3, freq='D')) y = s-s.shift(1) result = y.to_string() self.assertTrue('1 days, 00:00:00' in result) self.assertTrue('NaT' in result) # with frac seconds s = Series(date_range('2012-1-1', periods=3, freq='D')) y = s-datetime(2012,1,1,microsecond=150) result = y.to_string() self.assertTrue('00:00:00.000150' in result)
def sharpe_ratio(returns: pd.Series, cumulative: bool = False, entries_per_year: int = 252, risk_free_rate: float = 0) -> float: """ Calculates Annualized Sharpe Ratio for pd.Series of normal (not log) returns. :param returns: (pd.Series) returns :param cumulative: (bool) flag if returns are cumulative (no by default) :param entries_per_year: (int) times returns are recorded per year (days by default) :param risk_free_rate: (float) risk-free rate (0 by default) :return: (float) Annualized Sharpe Ratio """ if cumulative: returns = returns / returns.shift( 1) - 1 # Inverting cumulative returns returns = returns[1:] # Excluding empty value sharpe_r = (returns.mean() - risk_free_rate) / returns.std() * \ (entries_per_year) ** (1 / 2) return sharpe_r
def timing_of_flattening_and_flips( target_positions: pd.Series) -> pd.DatetimeIndex: """ Advances in Financial Machine Learning, Snippet 14.1, page 197 Derives the timestamps of flattening or flipping trades from a pandas series of target positions. Can be used for position changes analysis, such as frequency and balance of position changes. Flattenings - times when open position is bing closed (final target position is 0). Flips - times when positive position is reversed to negative and vice versa. :param target_positions: (pd.Series) Target position series with timestamps as indices :return: (pd.DatetimeIndex) Timestamps of trades flattening, flipping and last bet """ empty_positions = target_positions[( target_positions == 0)].index # Empty positions index previous_positions = target_positions.shift( 1) # Timestamps pointing at previous positions # Index of positions where previous one wasn't empty previous_positions = previous_positions[(previous_positions != 0)].index # FLATTENING - if previous position was open, but current is empty flattening = empty_positions.intersection(previous_positions) # Multiplies current position with value of next one multiplied_posions = target_positions.iloc[ 1:] * target_positions.iloc[:-1].values # FLIPS - if current position has another direction compared to the next flips = multiplied_posions[(multiplied_posions < 0)].index flips_and_flattenings = flattening.union(flips).sort_values() if target_positions.index[ -1] not in flips_and_flattenings: # Appending with last bet flips_and_flattenings = flips_and_flattenings.append( target_positions.index[-1:]) return flips_and_flattenings
def test_diff(self): # Just run the function self.ts.diff() # int dtype a = 10000000000000000 b = a + 1 s = Series([a, b]) rs = s.diff() self.assertEqual(rs[1], 1) # neg n rs = self.ts.diff(-1) xp = self.ts - self.ts.shift(-1) assert_series_equal(rs, xp) # 0 rs = self.ts.diff(0) xp = self.ts - self.ts assert_series_equal(rs, xp) # datetime diff (GH3100) s = Series(date_range('20130102', periods=5)) rs = s - s.shift(1) xp = s.diff() assert_series_equal(rs, xp) # timedelta diff nrs = rs - rs.shift(1) nxp = xp.diff() assert_series_equal(nrs, nxp) # with tz s = Series( date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s.diff() assert_series_equal(result, Series( TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
def test_reindex_pad(): s = Series(np.arange(10), dtype="int64") s2 = s[::2] reindexed = s2.reindex(s.index, method="pad") reindexed2 = s2.reindex(s.index, method="ffill") tm.assert_series_equal(reindexed, reindexed2) expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) tm.assert_series_equal(reindexed, expected) # GH4604 s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) new_index = ["a", "g", "c", "f"] expected = Series([1, 1, 3, 3], index=new_index) # this changes dtype because the ffill happens after result = s.reindex(new_index).ffill() tm.assert_series_equal(result, expected.astype("float64")) result = s.reindex(new_index).ffill(downcast="infer") tm.assert_series_equal(result, expected) expected = Series([1, 5, 3, 5], index=new_index) result = s.reindex(new_index, method="ffill") tm.assert_series_equal(result, expected) # inference of new dtype s = Series([True, False, False, True], index=list("abcd")) new_index = "agc" result = s.reindex(list(new_index)).ffill() expected = Series([True, True, False], index=list(new_index)) tm.assert_series_equal(result, expected) # GH4618 shifted series downcasting s = Series(False, index=range(0, 5)) result = s.shift(1).fillna(method="bfill") expected = Series(False, index=range(0, 5)) tm.assert_series_equal(result, expected)
def test_reindex_pad(): s = Series(np.arange(10), dtype='int64') s2 = s[::2] reindexed = s2.reindex(s.index, method='pad') reindexed2 = s2.reindex(s.index, method='ffill') assert_series_equal(reindexed, reindexed2) expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) assert_series_equal(reindexed, expected) # GH4604 s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) new_index = ['a', 'g', 'c', 'f'] expected = Series([1, 1, 3, 3], index=new_index) # this changes dtype because the ffill happens after result = s.reindex(new_index).ffill() assert_series_equal(result, expected.astype('float64')) result = s.reindex(new_index).ffill(downcast='infer') assert_series_equal(result, expected) expected = Series([1, 5, 3, 5], index=new_index) result = s.reindex(new_index, method='ffill') assert_series_equal(result, expected) # inference of new dtype s = Series([True, False, False, True], index=list('abcd')) new_index = 'agc' result = s.reindex(list(new_index)).ffill() expected = Series([True, True, False], index=list(new_index)) assert_series_equal(result, expected) # GH4618 shifted series downcasting s = Series(False, index=lrange(0, 5)) result = s.shift(1).fillna(method='bfill') expected = Series(False, index=lrange(0, 5)) assert_series_equal(result, expected)
def define_steps(blank: pd.Series): """ Steps graphically show the transitions for the levels Parameters ---------- blank : pd.Series Values where the data start Returns ------- values from which steps can be plotted """ index = [] values = [] bar_width = 0.25 for i, (v, v_last) in enumerate(zip(blank, blank.shift(-1))): index.extend([i + bar_width, i, i - bar_width]) # None assures bars are not connected to themselves values.extend([v, None, v_last]) return pd.Series(values, index=index)
def test_reindex_pad(): s = Series(np.arange(10), dtype='int64') s2 = s[::2] reindexed = s2.reindex(s.index, method='pad') reindexed2 = s2.reindex(s.index, method='ffill') assert_series_equal(reindexed, reindexed2) expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) assert_series_equal(reindexed, expected) # GH4604 s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) new_index = ['a', 'g', 'c', 'f'] expected = Series([1, 1, 3, 3], index=new_index) # this changes dtype because the ffill happens after result = s.reindex(new_index).ffill() assert_series_equal(result, expected.astype('float64')) result = s.reindex(new_index).ffill(downcast='infer') assert_series_equal(result, expected) expected = Series([1, 5, 3, 5], index=new_index) result = s.reindex(new_index, method='ffill') assert_series_equal(result, expected) # inference of new dtype s = Series([True, False, False, True], index=list('abcd')) new_index = 'agc' result = s.reindex(list(new_index)).ffill() expected = Series([True, True, False], index=list(new_index)) assert_series_equal(result, expected) # GH4618 shifted series downcasting s = Series(False, index=range(0, 5)) result = s.shift(1).fillna(method='bfill') expected = Series(False, index=range(0, 5)) assert_series_equal(result, expected)
def get_overlap_time_line(logs_dict): time_line_df = DataFrame(columns=['start_time', 'end_time']) if len(logs_dict) > 1: global_df = get_overlap_df(logs_dict) if len(global_df) > 1: time_series = Series(global_df.index) time_series_shift = time_series.shift() jump_series = (time_series - time_series_shift)[ (time_series - time_series_shift) > timedelta(0, param.UNIT_TIME * param.TIME_JUMPING_FACTOR)] if len(jump_series) == 0: if series_is_larger_than_time_window(time_series, param.TIME_WINDOW_SIZE): time_line_df = time_line_df.append( pd.DataFrame([[time_series.iloc[0], time_series.iloc[-1]]], columns=time_line_df.columns)) elif len(jump_series) == 1: if jump_series.iloc[0] == 0: if series_is_larger_than_time_window(time_series[1:], param.TIME_WINDOW_SIZE): time_line_df = time_line_df.append( pd.DataFrame([[time_series.iloc[1], time_series.iloc[-1]]], columns=time_line_df.columns)) elif jump_series.iloc[0] == len(time_series) - 1: if series_is_larger_than_time_window(time_series.iloc[1:-1], param.TIME_WINDOW_SIZE): time_line_df = time_line_df.append( pd.DataFrame([[time_series.iloc[0], time_series.iloc[-2]]], columns=time_line_df.columns)) time_lines = [] for i in range(len(jump_series) - 1): j_id = jump_series.index[i] next_j_id = jump_series.index[i + 1] if i == 0: if j_id > 1: if series_is_larger_than_time_window(time_series.iloc[0:j_id], param.TIME_WINDOW_SIZE): time_lines.append([time_series.iloc[0], time_series.iloc[j_id - 1]]) if series_is_larger_than_time_window(time_series.iloc[j_id:next_j_id], param.TIME_WINDOW_SIZE): time_lines.append([time_series.iloc[j_id], time_series.iloc[next_j_id - 1]]) time_line_df = time_line_df.append(pd.DataFrame(time_lines, columns=time_line_df.columns)) return time_line_df
def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) tm.assert_numpy_array_equal(unshifted.valid().values, self.ts.values[:-1]) offset = datetools.bday shifted = self.ts.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, freq=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, freq='B') unshifted = shifted.shift(-1, freq='B') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, datetools.bday) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) self.assertRaises(ValueError, ps.shift, freq='D') # legacy support shifted4 = ps.shift(1, freq='B') assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=datetools.bday) assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range('2000-01-01', periods=5) for dtype in ['int32', 'int64']: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) assert_series_equal(result, expected) # xref 8260 # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s - s.shift() exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo') assert_series_equal(result, exp) # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') self.assertRaises(ValueError, lambda: s - s2)
ts.resample('D') pd.date_range('4/1/2012', '6/1/2012') #Generating date ranges pd.date_range(start='4/1/2012', periods=20) pd.date_range(end='6/1/2012', periods=20) pd.date_range('1/1/2000', '12/1/2000', freq='BM') pd.date_range('5/2/2012 12:56:31', periods=5) pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True) pd.date_range('1/1/2000', periods=10, freq='1h30min') #Frequencies and Date Offsets # Shifting (leading and lagging) data ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) ts ts.shift(2) #this kind of shift will cause drop some data ts.shift(-2) ts.shift(2,freq='M') #this kind of shift just change the time #Time Zone Handling============================================= #Period========================================================== #resample======================================================== #Time series plotting============================================ close_px_all = pd.read_csv('stock_px.csv', parse_dates=True, index_col=0) close_px = close_px_all[['AAPL', 'MSFT', 'XOM']] close_px = close_px.resample('B', fill_method='ffill') close_px.info()
def slide7(): from pandas.tseries.offsets import Hour, Minute hour = Hour() print hour four_hours = Hour(4) print four_hours print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') print Hour(2) + Minute(30) print pd.date_range('1/1/2000', periods=10, freq='1h30min') ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print ts print ts.shift(2) print ts.shift(-2) print '2 M' print ts.shift(2, freq='M') print '3 D' print ts.shift(3, freq='D') print '1 3D' print ts.shift(1, freq='3D') print '1 90T' print ts.shift(1, freq='90T') print 'shifting dates with offsets' from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) print now + 3 * Day() print now + MonthEnd() print now + MonthEnd(2) offset = MonthEnd() print offset print offset.rollforward(now) print offset.rollback(now) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print ts.groupby(offset.rollforward).mean()
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') self.assertTrue(com.is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize( 'UTC').tz_convert(tz=s.dt.tz))) # indexing result = s.iloc[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # formatting with NaT result = s.shift() self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) self.assertTrue('NaT' in str(result)) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
class MySeries: def __init__(self, *args, **kwargs): self.x = Series(*args, **kwargs) self.values = self.x.values self.index = self.x.index def rolling_mean(self, *args, **kwargs): return MySeries(pd.rolling_mean(self.x, *args, **kwargs)) def rolling_count(self, *args, **kwargs): return MySeries(pd.rolling_count(self.x, *args, **kwargs)) def rolling_sum(self, *args, **kwargs): return MySeries(pd.rolling_sum(self.x, *args, **kwargs)) def rolling_median(self, *args, **kwargs): return MySeries(pd.rolling_median(self.x, *args, **kwargs)) def rolling_min(self, *args, **kwargs): return MySeries(pd.rolling_min(self.x, *args, **kwargs)) def rolling_max(self, *args, **kwargs): return MySeries(pd.rolling_max(self.x, *args, **kwargs)) def rolling_std(self, *args, **kwargs): return MySeries(pd.rolling_std(self.x, *args, **kwargs)) def rolling_var(self, *args, **kwargs): return MySeries(pd.rolling_var(self.x, *args, **kwargs)) def rolling_skew(self, *args, **kwargs): return MySeries(pd.rolling_skew(self.x, *args, **kwargs)) def rolling_kurtosis(self, *args, **kwargs): return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs)) def rolling_window(self, *args, **kwargs): return MySeries(pd.rolling_window(self.x, *args, **kwargs)) def cumprod(self, *args, **kwargs): return MySeries(self.x.cumprod(*args, **kwargs)) def cumsum(self, *args, **kwargs): return MySeries(self.x.cumsum(*args, **kwargs)) def diff(self, *args, **kwargs): return MySeries(self.x.diff(*args, **kwargs)) def div(self, *args, **kwargs): return MySeries(self.x.div(*args, **kwargs)) def mul(self, *args, **kwargs): return MySeries(self.x.mul(*args, **kwargs)) def add(self, *args, **kwargs): return MySeries(self.x.add(*args, **kwargs)) def dropna(self, *args, **kwargs): return MySeries(self.x.dropna(*args, **kwargs)) def fillna(self, *args, **kwargs): return MySeries(self.x.fillna(*args, **kwargs)) def floordiv(self, *args, **kwargs): return MySeries(self.x.floordiv(*args, **kwargs)) def mod(self, *args, **kwargs): return MySeries(self.x.mod(*args, **kwargs)) def nlargest(self, *args, **kwargs): return MySeries(self.x.nlargest(*args, **kwargs)) def nonzero(self, *args, **kwargs): return MySeries(self.x.nonzero(*args, **kwargs)) def nsmallest(self, *args, **kwargs): return MySeries(self.x.nsmallest(*args, **kwargs)) def pow(self, *args, **kwargs): return MySeries(self.x.pow(*args, **kwargs)) def rank(self, *args, **kwargs): return MySeries(self.x.rank(*args, **kwargs)) def round(self, *args, **kwargs): return MySeries(self.x.round(*args, **kwargs)) def shift(self, *args, **kwargs): return MySeries(self.x.shift(*args, **kwargs)) def sub(self, *args, **kwargs): return MySeries(self.x.sub(*args, **kwargs)) def abs(self, *args, **kwargs): return MySeries(self.x.abs(*args, **kwargs)) def clip(self, *args, **kwargs): return MySeries(self.x.clip(*args, **kwargs)) def clip_lower(self, *args, **kwargs): return MySeries(self.x.clip_lower(*args, **kwargs)) def clip_upper(self, *args, **kwargs): return MySeries(self.x.clip_upper(*args, **kwargs)) def interpolate(self, *args, **kwargs): return MySeries(self.x.interpolate(*args, **kwargs)) def resample(self, *args, **kwargs): return MySeries(self.x.resample(*args, **kwargs)) def replace(self, *args, **kwargs): return MySeries(self.x.replace(*args, **kwargs))
def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) tm.assert_numpy_array_equal(unshifted.dropna().values, self.ts.values[:-1]) offset = BDay() shifted = self.ts.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, freq=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, freq='B') unshifted = shifted.shift(-1, freq='B') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, BDay()) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq='D') # legacy support shifted4 = ps.shift(1, freq='B') assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range('2000-01-01', periods=5) for dtype in ['int32', 'int64']: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) assert_series_equal(result, expected) # xref 8260 # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s - s.shift() exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo') assert_series_equal(result, exp) # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') msg = ("DatetimeArray subtraction must have the same timezones or no" " timezones") with pytest.raises(TypeError, match=msg): s - s2
def position_to_return(inst: Instrument, position: pd.Series): return position.shift(1) * inst.ohlcv.CLOSE.diff(1)
# encoding=utf-8 import pandas as pd import numpy as np from pandas import Series, DataFrame from datetime import datetime from pandas.tseries.offsets import Day, MonthEnd # 移动(超前/滞后)数据 # 移动 shifting : 将时间前移或者后移 # Series和DataFrame都有一个对应的shift方法用于执行单纯的前移或者后移,同时保持索引不变 ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print ts # 向后移动 print ts.shift(2) # 向前移动 print ts.shift(-2) # 通常是用shift来计算一个时间序列或者多个时间序列中百分比的变化 print ts / ts.shift(1) - 1 # 只是移动shift并不会修改索引, 可以指定频率来对时间戳移动, 而不是移动数据,然后产生NaN print ts.shift(2, freq='M') print ts.shift(3, freq='D') print ts.shift(1, freq='3D') print ts.shift(1, freq='90T') # 通过偏移量对日期进行位移 now = datetime(2011, 11, 17) print now + 3 * Day()
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) assert s.dtype.name == 'datetime64[ns, US/Eastern]' assert s.dtype == 'datetime64[ns, US/Eastern]' assert is_datetime64tz_dtype(s.dtype) assert 'datetime64[ns, US/Eastern]' in str(s) # export result = s.values assert isinstance(result, np.ndarray) assert result.dtype == 'datetime64[ns]' exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) tm.assert_index_equal(dr, exp) # indexing result = s.iloc[0] assert result == Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D') result = s[0] assert result == Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D') result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # short str assert 'datetime64[ns, US/Eastern]' in str(s) # formatting with NaT result = s.shift() assert 'datetime64[ns, US/Eastern]' in str(result) assert 'NaT' in str(result) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) assert 'datetime64[ns, US/Eastern]' in str(t) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) assert s.dtype == 'datetime64[ns, US/Pacific]' assert lib.infer_dtype(s) == 'datetime64' s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) assert s.dtype == 'object' assert lib.infer_dtype(s) == 'datetime' # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
# 频率和日期偏移量 from pandas.tseries.offsets import Hour, Minute # 间隔频率为4小时 date = pd.date_range('2000 1 1','2000 1 5',freq='4h') # print date # 间隔频率为1小时30分 date = pd.date_range('2000 1 1','2000 1 5',freq='1h30min') # print date # 移动数据 # shift可以沿时间轴前移或后移 ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) # 移动数据 lagging_ts = ts.shift(2) leading_ts = ts.shift(-2) # print ts # print lagging_ts # print leading_ts # 移动时间index ,按月份移动 shift_ts = ts.shift(2, freq='M') # print ts # print shift_ts # 通过偏移量对日期位移 hours_3 = Hour(3) now = datetime.now() three_hours_later = now + hours_3 # print three_hours_later