Example #1
0
def kama(x, n=10, pow1=2, pow2=30):
    """KAMA: Kaufmans Adaptive Moving Average.

    Params:
        x (Series): Time series data such as close prices.

        n (int): number of periods for the Efficiency Ratio (ER).

        pow1 (int): number of periods for the fastest EMA constant.

        pow2 (int): number of periods for the slowest EMA constant.

    Returns:
        Series: Kaufmans adaptive moving average of x.
    """

    nan_count = x[pd.isnull(x)].size
    x = Series(x.dropna().values, name = x.name, index = x.index)

    change = (x - x.shift(n)).abs()
    volatility = (x - x.shift(1)).abs().rolling(window=n).sum()
    er = change / volatility
    sc = (er * (2.0 /(pow1 + 1.0) - 2.0 / (pow2 + 1.0)) + 2.0 / (pow2 + 1.0)) ** 2.0

    kama = [np.nan] * sc.size
    first_value = True
    for i in range(len(kama)):
        if not pd.isnull(sc[i]):
            if first_value:
                kama[i] = x[i]
                first_value = False
            else:
                kama[i] = kama[i-1] + sc[i] * (x[i] - kama[i-1])

    return Series(data = [np.nan] * nan_count + kama, name = "kama(%d,%d,%d)" % (n, pow1, pow2), index = x.index)
Example #2
0
    def test_shift_dst(self):
        # GH 13926
        dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern')
        s = Series(dates)

        res = s.shift(0)
        tm.assert_series_equal(res, s)
        self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')

        res = s.shift(1)
        exp_vals = [NaT] + dates.asobject.values.tolist()[:9]
        exp = Series(exp_vals)
        tm.assert_series_equal(res, exp)
        self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')

        res = s.shift(-2)
        exp_vals = dates.asobject.values.tolist()[2:] + [NaT, NaT]
        exp = Series(exp_vals)
        tm.assert_series_equal(res, exp)
        self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')

        for ex in [10, -10, 20, -20]:
            res = s.shift(ex)
            exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]')
            tm.assert_series_equal(res, exp)
            self.assertEqual(res.dtype, 'datetime64[ns, US/Eastern]')
Example #3
0
    def test_operators_na_handling(self):
        from decimal import Decimal
        from datetime import date
        s = Series([Decimal('1.3'), Decimal('2.3')],
                   index=[date(2012, 1, 1), date(2012, 1, 2)])

        result = s + s.shift(1)
        result2 = s.shift(1) + s
        assert isna(result[0])
        assert isna(result2[0])
Example #4
0
    def test_shift2(self):
        ts = Series(np.random.randn(5),
                    index=date_range('1/1/2000', periods=5, freq='H'))

        result = ts.shift(1, freq='5T')
        exp_index = ts.index.shift(1, freq='5T')
        tm.assert_index_equal(result.index, exp_index)

        # GH #1063, multiple of same base
        result = ts.shift(1, freq='4H')
        exp_index = ts.index + offsets.Hour(4)
        tm.assert_index_equal(result.index, exp_index)

        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'])
        self.assertRaises(ValueError, idx.shift, 1)
Example #5
0
class Timeseries(object):

    goal_time = 0.2

    params = [None, 'US/Eastern']
    param_names = ['tz']

    def setup(self, tz):
        self.N = 10**6
        self.halfway = ((self.N // 2) - 1)
        self.s = Series(date_range('20010101', periods=self.N, freq='T',
                                   tz=tz))
        self.ts = self.s[self.halfway]

        self.s2 = Series(date_range('20010101', periods=self.N, freq='s',
                                    tz=tz))

    def time_series_timestamp_compare(self, tz):
        self.s <= self.ts

    def time_timestamp_series_compare(self, tz):
        self.ts >= self.s

    def time_timestamp_ops_diff(self, tz):
        self.s2.diff()

    def time_timestamp_ops_diff_with_shift(self, tz):
        self.s - self.s.shift()
Example #6
0
    def addStrategy(self, name, strategy):
        signals = [(1 if strategy(i, self.df.iloc[i], self.stock) else 0) for i in range(self.df['Adj Close'].count())]
        signal = Series(signals, self.df.index)

        close = self.df['Adj Close']
        unit_income = np.log(close / close.shift(1)) * signal.shift(1)
        self.i_table[name] = np.exp(unit_income.cumsum())
Example #7
0
def shiftTs():
    dates = [datetime(2014,1,2), datetime(2014,1,3), datetime(2014,1,4), datetime(2014,1,5)]
    ts1 = Series(np.arange(4)+2, index=dates)
    #ts1 = ts1/ts1.shift(1) - 1
    print (ts1)
    ts1 = ts1.shift(1, freq='M')
    print (ts1)
Example #8
0
 def test_dti_shift_across_dst(self):
     # GH 8616
     idx = date_range('2013-11-03', tz='America/Chicago',
                      periods=7, freq='H')
     s = Series(index=idx[:-1])
     result = s.shift(freq='H')
     expected = Series(index=idx[1:])
     tm.assert_series_equal(result, expected)
Example #9
0
 def test_dti_shift_near_midnight(self, shift, result_time):
     # GH 8616
     dt = datetime(2014, 11, 14, 0)
     dt_est = pytz.timezone('EST').localize(dt)
     s = Series(data=[1], index=[dt_est])
     result = s.shift(shift, freq='H')
     expected = Series(1, index=DatetimeIndex([result_time], tz='EST'))
     tm.assert_series_equal(result, expected)
Example #10
0
    def test_shift2(self):
        ts = Series(np.random.randn(5),
                    index=date_range('1/1/2000', periods=5, freq='H'))

        result = ts.shift(1, freq='5T')
        exp_index = ts.index.shift(1, freq='5T')
        tm.assert_index_equal(result.index, exp_index)

        # GH #1063, multiple of same base
        result = ts.shift(1, freq='4H')
        exp_index = ts.index + offsets.Hour(4)
        tm.assert_index_equal(result.index, exp_index)

        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'])
        msg = "Cannot shift with no freq"
        with pytest.raises(NullFrequencyError, match=msg):
            idx.shift(1)
Example #11
0
    def test_shift_multiple_of_same_base(self):
        # GH #1063
        ts = Series(np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H"))

        result = ts.shift(1, freq="4H")

        exp_index = ts.index + datetools.Hour(4)

        self.assert_(result.index.equals(exp_index))
Example #12
0
    def test_shift_fill_value(self):
        # GH #24128
        ts = Series([1.0, 2.0, 3.0, 4.0, 5.0],
                    index=date_range('1/1/2000', periods=5, freq='H'))

        exp = Series([0.0, 1.0, 2.0, 3.0, 4.0],
                     index=date_range('1/1/2000', periods=5, freq='H'))
        # check that fill value works
        result = ts.shift(1, fill_value=0.0)
        tm.assert_series_equal(result, exp)

        exp = Series([0.0, 0.0, 1.0, 2.0, 3.0],
                     index=date_range('1/1/2000', periods=5, freq='H'))
        result = ts.shift(2, fill_value=0.0)
        tm.assert_series_equal(result, exp)

        ts = pd.Series([1, 2, 3])
        res = ts.shift(2, fill_value=0)
        assert res.dtype == ts.dtype
Example #13
0
    def test_operators_na_handling(self):
        from decimal import Decimal
        from datetime import date
        s = Series([Decimal('1.3'), Decimal('2.3')],
                   index=[date(2012, 1, 1), date(2012, 1, 2)])

        result = s + s.shift(1)
        result2 = s.shift(1) + s
        self.assertTrue(isnull(result[0]))
        self.assertTrue(isnull(result2[0]))

        s = Series(['foo', 'bar', 'baz', np.nan])
        result = 'prefix_' + s
        expected = Series(['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan])
        assert_series_equal(result, expected)

        result = s + '_suffix'
        expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan])
        assert_series_equal(result, expected)
Example #14
0
    def test_timedelta64(self):

        from pandas import date_range
        from datetime import datetime, timedelta

        Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string()

        s = Series(date_range('2012-1-1', periods=3, freq='D'))

        # GH2146

        # adding NaTs
        y = s-s.shift(1)
        result = y.to_string()
        self.assertTrue('1 days, 00:00:00' in result)
        self.assertTrue('NaT' in result)

        # with frac seconds
        o = Series([datetime(2012,1,1,microsecond=150)]*3)
        y = s-o
        result = y.to_string()
        self.assertTrue('-00:00:00.000150' in result)

        # rounding?
        o = Series([datetime(2012,1,1,1)]*3)
        y = s-o
        result = y.to_string()
        self.assertTrue('-01:00:00' in result)
        self.assertTrue('1 days, 23:00:00' in result)

        o = Series([datetime(2012,1,1,1,1)]*3)
        y = s-o
        result = y.to_string()
        self.assertTrue('-01:01:00' in result)
        self.assertTrue('1 days, 22:59:00' in result)

        o = Series([datetime(2012,1,1,1,1,microsecond=150)]*3)
        y = s-o
        result = y.to_string()
        self.assertTrue('-01:01:00.000150' in result)
        self.assertTrue('1 days, 22:58:59.999850' in result)

        # neg time
        td = timedelta(minutes=5,seconds=3)
        s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td
        y = s - s2
        result = y.to_string()
        self.assertTrue('-00:05:03' in result)

        td = timedelta(microseconds=550)
        s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td
        y = s - td
        result = y.to_string()
        self.assertTrue('2012-01-01 23:59:59.999450' in result)
Example #15
0
    def test_comparison_object_numeric_nas(self):
        ser = Series(np.random.randn(10), dtype=object)
        shifted = ser.shift(2)

        ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
        for op in ops:
            func = getattr(operator, op)

            result = func(ser, shifted)
            expected = func(ser.astype(float), shifted.astype(float))
            tm.assert_series_equal(result, expected)
Example #16
0
def DONCH(df, n):
    i = 0
    DC_l = []
    while i < n - 1:
        DC_l.append(0)
        i = i + 1
    i = 0
    while i + n - 1 < df.index[-1]:
        DC = max(df["High"].ix[i : i + n - 1]) - min(df["Low"].ix[i : i + n - 1])
        DC_l.append(DC)
        i = i + 1
    DonCh = Series(DC_l, name="Donchian_" + str(n))
    DonCh = DonCh.shift(n - 1)
    df = df.join(DonCh)
    return df
Example #17
0
def kama(close,
         length=None,
         fast=None,
         slow=None,
         drift=None,
         offset=None,
         **kwargs):
    """Indicator: Kaufman's Adaptive Moving Average (HMA)"""
    # Validate Arguments
    close = verify_series(close)
    length = int(length) if length and length > 0 else 10
    fast = int(fast) if fast and fast > 0 else 2
    slow = int(slow) if slow and slow > 0 else 30
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    m = close.size

    def weight(length: int) -> float:
        return 2 / (length + 1)

    fr = weight(fast)
    sr = weight(slow)

    abs_diff = non_zero_range(close, close.shift(length)).abs()
    peer_diff = non_zero_range(close, close.shift(drift)).abs()
    peer_diff_sum = peer_diff.rolling(length).sum()
    er = abs_diff / peer_diff_sum
    x = er * (fr - sr) + sr
    sc = x * x

    result = [npNaN for _ in range(0, length - 1)] + [0]
    for i in range(length, m):
        result.append(sc[i] * close[i] + (1 - sc[i]) * result[i - 1])

    kama = Series(result, index=close.index)

    # Offset
    if offset != 0:
        kama = kama.shift(offset)

    # Name & Category
    kama.name = f"KAMA_{length}_{fast}_{slow}"
    kama.category = "overlap"

    return kama
Example #18
0
def trend_return(close,
                 trend,
                 log=None,
                 cumulative=None,
                 offset=None,
                 trend_reset=0,
                 **kwargs):
    """Indicator: Trend Return"""
    # Validate Arguments
    close = verify_series(close)
    trend = verify_series(trend)
    offset = get_offset(offset)
    trend_reset = int(trend_reset) if trend_reset and isinstance(
        trend_reset, int) else 0

    # Calculate Result
    returns = log_return(close, cumulative=False) if log else percent_return(
        close, cumulative=False)
    m = trend.size
    tsum = 0
    trend = trend.astype(int)
    returns = (trend * returns).apply(zero)

    result = []
    for i in range(0, m):
        if trend[i] == trend_reset:
            tsum = 0
        else:
            return_ = returns[i]
            if cumulative:
                tsum += return_
            else:
                tsum = return_
        result.append(tsum)

    trend_return = Series(result)

    # Offset
    if offset != 0:
        trend_return = trend_return.shift(offset)

    # Name & Category
    trend_return.name = f"{'C' if cumulative else ''}{'L' if log else 'P'}TR"
    trend_return.category = 'performance'

    return trend_return
Example #19
0
def ta_future_multiband_bucket(df: _pd.Series, forecast_period=14, period=5, stddevs=[0.5, 1.0, 1.5, 2.0], ddof=1):
    buckets = _i.ta_multi_bbands(df, period, stddevs=stddevs, ddof=ddof)
    future = df.shift(-forecast_period)

    # return index of bucket of which the future price lies in
    def index_of_bucket(value, data):
        if _np.isnan(value):
            return value

        for i, v in enumerate(data):
            if value < v:
                return i

        return len(data)

    return \
      buckets.join(future).apply(lambda row: index_of_bucket(row[future.name], row[buckets.columns]), axis=1, raw=False)
Example #20
0
def points_grid_to_poly(gpd, id_col):
    """
    Function to convert a GeoDataFrame of evenly spaced gridded points to square polygons. Output is a GeoDataFrame of the same length as input.

    gpd -- GeoDataFrame of gridded points with an id column.\n
    id_col -- The id column name.
    """

    geo1a = Series(gpd.geometry.apply(lambda j: j.x))
    geo1b = geo1a.shift()

    side_len1 = (geo1b - geo1a).abs()
    side_len = side_len1[side_len1 > 0].min()
    gpd1 = gpd.apply(
        lambda j: point_to_poly_apply(j.geometry, side_len=side_len), axis=1)
    gpd2 = GeoDataFrame(gpd[id_col], geometry=gpd1, crs=gpd.crs)
    return (gpd2)
Example #21
0
def roc(values: pd.Series, period: int = 1) -> pd.Series:
    """
    Calculate the rate of change of a price serie.

    Parameters
    ----------
    values : pd.Series
        serie of data
    period : int
        number of period

    Returns
    -------
    pd.Series
        rate of change of the `values` paramater
    """
    return 100 * values.diff(period) / values.shift(period)
Example #22
0
def get_target_return(t: pd.Series,
                      nb_days=365,
                      weeks=None,
                      months=None,
                      trim_data=False) -> pd.Series:
    """Retourne le rendement obtenu si par exemple on achète le titre à une certaine
    date et qu'on le revend nb_days plus tard.
    """

    if months is not None:
        weeks = months * 4
    if weeks is not None:
        nb_days = weeks * 7

    # Ajout d'un index sur tous les jours. On assume que les rendements
    # sont persistés, par exemple ceux du vendredi égalent ceux du samedi et
    # du dimanche
    idx = pd.date_range(start=t.index.min(), end=t.index.max(), freq="D")
    t = t.reindex(idx, method="ffill")
    vi = t
    vf = t.shift(-nb_days)
    r = vf / vi - 1

    # Annualisation
    r = (1 + r)**(365 / nb_days) - 1
    r = 100 * r

    # On peut maintenant supprimer l'index contenant les fds pour ne conserver que
    # jours définis par t.
    r_idx = r.index.intersection(t.index)
    r = r.loc[r_idx]

    # On conserve uniquement les rendements qui contiennent de l'information Par ex. si
    # t donne de l'information jusqu'au 2019-10-31 et que nb_days = 365, alors on peut
    # s'attendre à ce que r soit défini jusqu'à environ 2018-10-31
    # On détermine les lignes telles que tous les rendements sont N/A, puis on les
    # supprime.
    r = r[~r.isna().all(axis=1)]

    if trim_data:
        if isinstance(r, pd.Series):
            r = r[~r.isna()]
        elif isinstance(r, pd.DataFrame):
            r = r[~r.isna().any(axis=1)]
    return r
Example #23
0
    def test_datetime_series_shift_with_freq(self, datetime_series):
        shifted = datetime_series.shift(1, freq="infer")
        unshifted = shifted.shift(-1, freq="infer")
        tm.assert_series_equal(datetime_series, unshifted)

        shifted2 = datetime_series.shift(freq=datetime_series.index.freq)
        tm.assert_series_equal(shifted, shifted2)

        inferred_ts = Series(datetime_series.values,
                             Index(np.asarray(datetime_series.index)),
                             name="ts")
        shifted = inferred_ts.shift(1, freq="infer")
        expected = datetime_series.shift(1, freq="infer")
        expected.index = expected.index._with_freq(None)
        tm.assert_series_equal(shifted, expected)

        unshifted = shifted.shift(-1, freq="infer")
        tm.assert_series_equal(unshifted, inferred_ts)
def outs_per_inning(x: pd.Series):
    """
    An aggregation function that takes the sum of a one-time step difference in a `pd.Series`.
    Intended to be used in a groupby aggregation to calculate the number of outs recorded in an inning.

    * **usage**:

    ```python
    df.groupby(["inning"]).agg({"postouts": outs_per_inning})
    ```

    * input:
        - `x`: `pd.Series`

    * output:
        - sum of one time-step differences in `x`
    """
    return (x - x.shift(1).fillna(0)).sum()
Example #25
0
def get_bar_based_hasbrouck_lambda(close: pd.Series,
                                   dollar_volume: pd.Series,
                                   window: int = 20) -> pd.Series:
    """
    Advances in Financial Machine Learning, p.289-290.

    Get Hasbrouck lambda from bars data

    :param close: (pd.Series) Close prices
    :param dollar_volume: (pd.Series) Dollar volumes
    :param window: (int) Rolling window used for estimation
    :return: (pd.Series) Hasbrouck lambda
    """
    log_ret = np.log(close / close.shift(1))
    log_ret_sign = np.sign(log_ret).replace(0, method='pad')

    signed_dollar_volume_sqrt = log_ret_sign * np.sqrt(dollar_volume)
    return (log_ret / signed_dollar_volume_sqrt).rolling(window=window).mean()
Example #26
0
def fisher(high, low, length=None, offset=None, **kwargs):
    """Indicator: Fisher Transform (FISHT)"""
    # Validate Arguments
    high = verify_series(high)
    low = verify_series(low)
    length = int(length) if length and length > 0 else 5
    offset = get_offset(offset)

    # Calculate Result
    m = high.size
    hl2_ = hl2(high, low)
    max_high = hl2_.rolling(length).max()
    min_low = hl2_.rolling(length).min()
    hl2_range = max_high - min_low
    hl2_range[hl2_range < 1e-5] = 0.001
    position = (hl2_ - min_low) / hl2_range

    v = 0
    fish = 0
    result = [npNaN for _ in range(0, length - 1)]
    for i in range(length - 1, m):
        v = 0.66 * (position[i] - 0.5) + 0.67 * v
        if v > 0.99: v = 0.999
        if v < -0.99: v = -0.999
        fish = 0.5 * (fish + nplog((1 + v) / (1 - v)))
        result.append(fish)

    fisher = Series(result)

    # Offset
    if offset != 0:
        fisher = fisher.shift(offset)

    # Handle fills
    if 'fillna' in kwargs:
        fisher.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        fisher.fillna(method=kwargs['fill_method'], inplace=True)

    # Name and Categorize it
    fisher.name = f"FISHERT_{length}"
    fisher.category = 'momentum'

    return fisher
Example #27
0
def price_channel_upper(price_high: Series, period: int) -> Series:
    """
    가격채널상한선

    <설명>
    가격 채널 상한선을 구하는 함수입니다.
    가격 채널 상한선은 일정 기간 내의 최고가를 이은 선입니다.

    <사용 방법>
    첫 번째 인자에는 고가를,
    두 번째 인자에는 가격 채널 상한선을 구하는데 사용하는 기간을 적으면 됩니다.
    예를 들어, 20일간 채널 지표 상한선을 구하고자 하는 경우
    'price_channel_upper(high, 20)' 또는 '가격채널상한선(고가, 20)'과 같이 작성하면 됩니다.

    :param price_high: (고가) 고가
    :param period: (기간) 가격 채널 상한선을 구할 때 사용하는 기간
    :return:
    """
    return price_high.shift(1).rolling(window=period).max()
Example #28
0
def price_channel_lower(price_low: Series, period: int) -> Series:
    """
    가격채널하한선

    <설명>
    가격 채널 하한선을 구하는 함수입니다.
    가격 채널 하한선은 일정 기간 내의 최저가를 이은 선입니다.

    <사용 방법>
    첫 번째 인자에는 저가를,
    두 번째 인자에는 가격 채널 하한선을 구하는데 사용하는 기간을 적으면 됩니다.
    예를 들어, 20일간 채널 지표 하한선을 구하고자 하는 경우
    'price_channel_lower(low, 20)' 또는 '가격채널하한선(저가, 20)'과 같이 작성하면 됩니다.

    :param price_low: (저가) 저가
    :param period: (기간) 가격 채널 상한선을 구하는 기간
    :return:
    """
    return price_low.shift(1).rolling(window=period).min()
Example #29
0
def crosscorr(data_x: pd.Series,
              data_y: pd.Series,
              lag: Optional[int] = 0) -> float:
    """
    Calculate Lag-N cross correlation.
    Parameters
    ----------
    data_x : pandas Series
        The first time series
    data_y : pandas Series
        The 2nd time series
    lag : int, optional, default is 0
        Lag in days.

    Returns
    -------
    Cross-correlation with specified lag for the given time series.
    """
    return data_x.corr(data_y.shift(lag, freq="D"))
Example #30
0
def vidya(close, length=None, drift=None, offset=None, **kwargs):
    """Indicator: Variable Index Dynamic Average (VIDYA)"""
    # Validate Arguments
    length = int(length) if length and length > 0 else 14
    close = verify_series(close, length)
    drift = get_drift(drift)
    offset = get_offset(offset)

    if close is None: return

    def _cmo(source: Series, n: int, d: int):
        """Chande Momentum Oscillator (CMO) Patch
        For some reason: from pandas_ta.momentum import cmo causes
        pandas_ta.momentum.coppock to not be able to import it's
        wma like from pandas_ta.overlap import wma?
        Weird Circular TypeError!?!
        """
        mom = source.diff(d)
        positive = mom.copy().clip(lower=0)
        negative = mom.copy().clip(upper=0).abs()
        pos_sum = positive.rolling(n).sum()
        neg_sum = negative.rolling(n).sum()
        return (pos_sum - neg_sum) / (pos_sum + neg_sum)

    # Calculate Result
    m = close.size
    alpha = 2 / (length + 1)
    abs_cmo = _cmo(close, length, drift).abs()
    vidya = Series(0, index=close.index)
    for i in range(length, m):
        vidya.iloc[i] = alpha * abs_cmo.iloc[i] * close.iloc[i] + vidya.iloc[
            i - 1] * (1 - alpha * abs_cmo.iloc[i])
    vidya.replace({0: npNaN}, inplace=True)

    # Offset
    if offset != 0:
        vidya = vidya.shift(offset)

    # Name & Category
    vidya.name = f"VIDYA_{length}"
    vidya.category = "overlap"

    return vidya
Example #31
0
def get_feature_return(t: pd.Series, nb_days=[1, 7, 30]):
    """`t` représente une série temporelle des prix d'un actif. 
    On retourne un dataframe dont chaque colonne représente les rendements des 
    derniers jours, spécifiés par l'argument `nb_days`.
    """
    idx = pd.date_range(start=t.index.min(), end=t.index.max(), freq="D")
    t = t.reindex(idx, method="ffill")

    u = {}
    vf = t
    for day_shift in nb_days:
        vi = t.shift(day_shift)
        r = vf / vi - 1
        r = (1 + r)**(365 / day_shift) - 1
        r *= 100
        u[f"r_{day_shift}"] = r

    u = pd.DataFrame(u)
    return u
Example #32
0
def lag_time_series(time_series: pd.Series, lags):
    """ Create lagged time series features.

    Parameters
    ----------
    time_series : pd.Series
    lags : list[int]
        List of lags

    Returns
    -------
    pd.DataFrame
        Lagged time series features.
    """
    lagged_time_series = {}
    for lag in lags:
        lagged_time_series[str(lag)] = time_series.shift(lag)

    return pd.concat(lagged_time_series, axis=1)
Example #33
0
    def max_index(s: pd.Series, n: int = 5):
        """
        找最大值下标
        :param s:
        :param n:
        :return:
        """
        if len(s.dropna()) < 5:
            return pd.Series(index=s.index)

        cont = []
        for i in range(0, n):
            cont.append(s.shift(i))
        k = pd.concat(cont, axis=1)
        k.columns = [n - i + 1 for i in range(1, n + 1)]
        m = k.T.idxmax()
        # 前n-1个不进行比较
        m[0:n - 1] = np.nan
        return m
Example #34
0
def rsi_cross_signals(rsi_values: pd.Series,
                      cross_line: float,
                      direction: str = 'rise'):
    """
    Calculates buy/sell signals for given RSI signal line. Returns table with 
    True values for days when signal appears.

    Parameters
    ----------
    rsi_values : pandas.Series 
        DataFrame with RSI column
    cross_line : float
        signal threshold line, when signal line crosses this line signal is set
    direction : str
        direction the signal line should cross threshold line 
        ('rise' - signal increasing [default], 'fall' - signal decreasing)
    
    Returns
    -------
    pandas.Series
    """

    if not (0 < cross_line < 100):
        raise ValueError('cross_line takes values from 0 to 100')

    rsi_copy = pd.DataFrame()
    rsi_copy['RSI'] = rsi_values
    rsi_copy['RSI day before'] = rsi_values.shift(1)

    if 'rise' == direction:
        # True signal if RSI is increasing and crossing the threshold line
        output = (rsi_copy['RSI'] >= cross_line) & (rsi_copy['RSI day before']
                                                    < cross_line)
    elif 'fall' == direction:
        # True signal if RSI is decreasing and crossing the threshold line
        output = (rsi_copy['RSI'] <= cross_line) & (rsi_copy['RSI day before']
                                                    > cross_line)
    else:
        raise ValueError('wrong value for direction, must be "rise" or "fall"')

    output = output.rename(f'Cross signal ({cross_line} on {direction})')

    return output
Example #35
0
    def test_argsort(self, datetime_series):
        self._check_accum_op("argsort", datetime_series, check_dtype=False)
        argsorted = datetime_series.argsort()
        assert issubclass(argsorted.dtype.type, np.integer)

        # GH 2967 (introduced bug in 0.11-dev I think)
        s = Series([Timestamp("201301{i:02d}".format(i=i)) for i in range(1, 6)])
        assert s.dtype == "datetime64[ns]"
        shifted = s.shift(-1)
        assert shifted.dtype == "datetime64[ns]"
        assert isna(shifted[4])

        result = s.argsort()
        expected = Series(range(5), dtype="int64")
        tm.assert_series_equal(result, expected)

        result = shifted.argsort()
        expected = Series(list(range(4)) + [-1], dtype="int64")
        tm.assert_series_equal(result, expected)
Example #36
0
def wma(close, length=None, asc=None, offset=None, **kwargs):
    """Indicator: Weighted Moving Average (WMA)"""
    # Validate Arguments
    close = verify_series(close)
    length = int(length) if length and length > 0 else 10
    asc = asc if asc else True
    offset = get_offset(offset)

    # Calculate Result
    if Imports["talib"]:
        from talib import WMA
        wma = WMA(close, length)
    else:
        total_weight = 0.5 * length * (length + 1)
        weights_ = np.arange(1, length + 1)
        weights = weights_ if asc else np.flip(weights_)

        def _linear(x):
            return np.dot(x, weights) / total_weight

        values = [
            _linear(each)
            for each in np.lib.stride_tricks.sliding_window_view(np.array(close), length)
        ]
        wma_ds = Series([np.NaN] * (length - 1) + values)
        wma_ds.index = close.index

    # Offset
    if offset != 0:
        wma_ds = wma_ds.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        wma_ds.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        wma_ds.fillna(method=kwargs["fill_method"], inplace=True)

    # Name & Category
    wma_ds.name = f"WMA_{length}"
    wma_ds.category = "overlap"

    return wma_ds
    def test_diff(self):
        # Just run the function
        self.ts.diff()

        # int dtype
        a = 10000000000000000
        b = a + 1
        s = Series([a, b])

        rs = s.diff()
        assert rs[1] == 1

        # neg n
        rs = self.ts.diff(-1)
        xp = self.ts - self.ts.shift(-1)
        assert_series_equal(rs, xp)

        # 0
        rs = self.ts.diff(0)
        xp = self.ts - self.ts
        assert_series_equal(rs, xp)

        # datetime diff (GH3100)
        s = Series(date_range('20130102', periods=5))
        rs = s - s.shift(1)
        xp = s.diff()
        assert_series_equal(rs, xp)

        # timedelta diff
        nrs = rs - rs.shift(1)
        nxp = xp.diff()
        assert_series_equal(nrs, nxp)

        # with tz
        s = Series(date_range('2000-01-01 09:00:00',
                              periods=5,
                              tz='US/Eastern'),
                   name='foo')
        result = s.diff()
        assert_series_equal(
            result, Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4),
                           name='foo'))
Example #38
0
def plot_growth_rate(data: pd.Series, smoothing_window: int = 4) -> None:
    """
    plots the daily growth rate of the time series data (i.e. index of data
    must consecutive days as DateTime-objects)
    """

    # compute daily growth rate
    rates = data / data.shift(1, pd.to_timedelta('1d'))
    rates = rates.dropna()

    # smoothen by computing thee geometric over a smoothing window
    # of the specified lenght
    rates_smooth = rates.rolling(window=smoothing_window *
                                 pd.to_timedelta('1d')).apply(gmean)
    rates_smooth = rates_smooth.dropna()

    plt.plot(rates_smooth)
    plt.plot(rates)
    plt.legend(['Smooth', 'Original'])
    plt.show()
Example #39
0
def get_data_range_iter(s: pd.Series, extent_left=False):
    """
    从序列数据中迭代输出每一段相同数据的index范围
    :param s:
    :param extent_left: 左边界与上一个迭代的右边界使用同一个值
    :return:
    """
    is_new_range, idx_from, idx_to, data = True, s.index[0], None, None
    for (idx_to, data), (_, d2) in zip(s.items(), s.shift(-1).items()):
        if is_new_range and not extent_left:
            idx_from = idx_to
            is_new_range = False
        if data != d2 and not (np.isnan(data) and np.isnan(d2)):
            yield idx_from, idx_to, data
            if extent_left:
                idx_from = idx_to
            is_new_range = True
    else:
        if not is_new_range:
            yield idx_from, idx_to, data
Example #40
0
def daily_growth_rate(series: pd.Series, **kwargs):
    PERIOD = 7
    THRESHOLD = 10  # minimum cases per day on average

    # growth rate
    series = series.rolling(PERIOD).mean()
    series = series.where(series >= THRESHOLD,
                          other=np.nan)  # ignore small data
    k = np.log(
        series / series.shift(PERIOD)) / PERIOD * 100  # daily growth rate %
    if k.isna().all():
        return None
    fig, ax = plt.subplots()
    line(ax, k, kwargs)
    ax.axhline(0, color='#999999', lw=0.5)
    previous_lfooter = kwargs['lfooter'] if 'lfooter' in kwargs else ''
    kwargs[
        'lfooter'] = f'When daily new cases >= {THRESHOLD}; ' + previous_lfooter
    finalise_plot(ax, **kwargs)
    return None
Example #41
0
def hwma(close, na=None, nb=None, nc=None, offset=None, **kwargs):
    """Indicator: Holt-Winter Moving Average"""
    # Validate Arguments
    close = verify_series(close)
    na = float(na) if na and na > 0 and na < 1 else 0.2
    nb = float(nb) if nb and nb > 0 and nb < 1 else 0.1
    nc = float(nc) if nc and nc > 0 and nc < 1 else 0.1
    offset = get_offset(offset)

    # Calculate Result
    last_a = last_v = 0
    last_f = close[0]

    result = []
    m = close.size
    for i in range(m):
        F = (1.0 - na) * (last_f + last_v + 0.5 * last_a) + na * close[i]
        V = (1.0 - nb) * (last_v + last_a) + nb * (F - last_f)
        A = (1.0 - nc) * last_a + nc * (V - last_v)
        result.append((F + V + 0.5 * A))
        # update values
        last_a, last_f, last_v = A, F, V

    hwma = Series(result, index=close.index)

    # Offset
    if offset != 0:
        hwma = hwma.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        hwma.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        hwma.fillna(method=kwargs["fill_method"], inplace=True)

    # Name & Category
    suffix = f"{na}_{nb}_{nc}"
    hwma.name = f"HWMA_{suffix}"
    hwma.category = "overlap"

    return hwma
Example #42
0
    def test_timedelta64(self):

        from pandas import date_range
        from datetime import datetime

        Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string()
        # check this works
        # GH2146

        # adding NaTs
        s = Series(date_range('2012-1-1', periods=3, freq='D'))
        y = s-s.shift(1)
        result = y.to_string()
        self.assertTrue('1 days, 00:00:00' in result)
        self.assertTrue('NaT' in result)

        # with frac seconds
        s = Series(date_range('2012-1-1', periods=3, freq='D'))
        y = s-datetime(2012,1,1,microsecond=150)
        result = y.to_string()
        self.assertTrue('00:00:00.000150' in result)
def sharpe_ratio(returns: pd.Series,
                 cumulative: bool = False,
                 entries_per_year: int = 252,
                 risk_free_rate: float = 0) -> float:
    """
    Calculates Annualized Sharpe Ratio for pd.Series of normal (not log) returns.

    :param returns: (pd.Series) returns
    :param cumulative: (bool) flag if returns are cumulative (no by default)
    :param entries_per_year: (int) times returns are recorded per year (days by default)
    :param risk_free_rate: (float) risk-free rate (0 by default)
    :return: (float) Annualized Sharpe Ratio
    """
    if cumulative:
        returns = returns / returns.shift(
            1) - 1  # Inverting cumulative returns
        returns = returns[1:]  # Excluding empty value
    sharpe_r = (returns.mean() - risk_free_rate) / returns.std() * \
               (entries_per_year) ** (1 / 2)

    return sharpe_r
Example #44
0
def timing_of_flattening_and_flips(
        target_positions: pd.Series) -> pd.DatetimeIndex:
    """
    Advances in Financial Machine Learning, Snippet 14.1, page 197

    Derives the timestamps of flattening or flipping trades from a pandas series
    of target positions. Can be used for position changes analysis, such as
    frequency and balance of position changes.

    Flattenings - times when open position is bing closed (final target position is 0).
    Flips - times when positive position is reversed to negative and vice versa.

    :param target_positions: (pd.Series) Target position series with timestamps as indices
    :return: (pd.DatetimeIndex) Timestamps of trades flattening, flipping and last bet
    """

    empty_positions = target_positions[(
        target_positions == 0)].index  # Empty positions index
    previous_positions = target_positions.shift(
        1)  # Timestamps pointing at previous positions

    # Index of positions where previous one wasn't empty
    previous_positions = previous_positions[(previous_positions != 0)].index

    # FLATTENING - if previous position was open, but current is empty
    flattening = empty_positions.intersection(previous_positions)

    # Multiplies current position with value of next one
    multiplied_posions = target_positions.iloc[
        1:] * target_positions.iloc[:-1].values

    # FLIPS - if current position has another direction compared to the next
    flips = multiplied_posions[(multiplied_posions < 0)].index
    flips_and_flattenings = flattening.union(flips).sort_values()
    if target_positions.index[
            -1] not in flips_and_flattenings:  # Appending with last bet
        flips_and_flattenings = flips_and_flattenings.append(
            target_positions.index[-1:])

    return flips_and_flattenings
Example #45
0
    def test_diff(self):
        # Just run the function
        self.ts.diff()

        # int dtype
        a = 10000000000000000
        b = a + 1
        s = Series([a, b])

        rs = s.diff()
        self.assertEqual(rs[1], 1)

        # neg n
        rs = self.ts.diff(-1)
        xp = self.ts - self.ts.shift(-1)
        assert_series_equal(rs, xp)

        # 0
        rs = self.ts.diff(0)
        xp = self.ts - self.ts
        assert_series_equal(rs, xp)

        # datetime diff (GH3100)
        s = Series(date_range('20130102', periods=5))
        rs = s - s.shift(1)
        xp = s.diff()
        assert_series_equal(rs, xp)

        # timedelta diff
        nrs = rs - rs.shift(1)
        nxp = xp.diff()
        assert_series_equal(nrs, nxp)

        # with tz
        s = Series(
            date_range('2000-01-01 09:00:00', periods=5,
                       tz='US/Eastern'), name='foo')
        result = s.diff()
        assert_series_equal(result, Series(
            TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
Example #46
0
def test_reindex_pad():
    s = Series(np.arange(10), dtype="int64")
    s2 = s[::2]

    reindexed = s2.reindex(s.index, method="pad")
    reindexed2 = s2.reindex(s.index, method="ffill")
    tm.assert_series_equal(reindexed, reindexed2)

    expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
    tm.assert_series_equal(reindexed, expected)

    # GH4604
    s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
    new_index = ["a", "g", "c", "f"]
    expected = Series([1, 1, 3, 3], index=new_index)

    # this changes dtype because the ffill happens after
    result = s.reindex(new_index).ffill()
    tm.assert_series_equal(result, expected.astype("float64"))

    result = s.reindex(new_index).ffill(downcast="infer")
    tm.assert_series_equal(result, expected)

    expected = Series([1, 5, 3, 5], index=new_index)
    result = s.reindex(new_index, method="ffill")
    tm.assert_series_equal(result, expected)

    # inference of new dtype
    s = Series([True, False, False, True], index=list("abcd"))
    new_index = "agc"
    result = s.reindex(list(new_index)).ffill()
    expected = Series([True, True, False], index=list(new_index))
    tm.assert_series_equal(result, expected)

    # GH4618 shifted series downcasting
    s = Series(False, index=range(0, 5))
    result = s.shift(1).fillna(method="bfill")
    expected = Series(False, index=range(0, 5))
    tm.assert_series_equal(result, expected)
Example #47
0
def test_reindex_pad():
    s = Series(np.arange(10), dtype='int64')
    s2 = s[::2]

    reindexed = s2.reindex(s.index, method='pad')
    reindexed2 = s2.reindex(s.index, method='ffill')
    assert_series_equal(reindexed, reindexed2)

    expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
    assert_series_equal(reindexed, expected)

    # GH4604
    s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
    new_index = ['a', 'g', 'c', 'f']
    expected = Series([1, 1, 3, 3], index=new_index)

    # this changes dtype because the ffill happens after
    result = s.reindex(new_index).ffill()
    assert_series_equal(result, expected.astype('float64'))

    result = s.reindex(new_index).ffill(downcast='infer')
    assert_series_equal(result, expected)

    expected = Series([1, 5, 3, 5], index=new_index)
    result = s.reindex(new_index, method='ffill')
    assert_series_equal(result, expected)

    # inference of new dtype
    s = Series([True, False, False, True], index=list('abcd'))
    new_index = 'agc'
    result = s.reindex(list(new_index)).ffill()
    expected = Series([True, True, False], index=list(new_index))
    assert_series_equal(result, expected)

    # GH4618 shifted series downcasting
    s = Series(False, index=lrange(0, 5))
    result = s.shift(1).fillna(method='bfill')
    expected = Series(False, index=lrange(0, 5))
    assert_series_equal(result, expected)
Example #48
0
    def define_steps(blank: pd.Series):
        """
        Steps graphically show the transitions for the levels

        Parameters
        ----------
        blank : pd.Series
            Values where the data start

        Returns
        -------
        values from which steps can be plotted

        """
        index = []
        values = []
        bar_width = 0.25
        for i, (v, v_last) in enumerate(zip(blank, blank.shift(-1))):
            index.extend([i + bar_width, i, i - bar_width])
            # None assures bars are not connected to themselves
            values.extend([v, None, v_last])
        return pd.Series(values, index=index)
Example #49
0
def test_reindex_pad():
    s = Series(np.arange(10), dtype='int64')
    s2 = s[::2]

    reindexed = s2.reindex(s.index, method='pad')
    reindexed2 = s2.reindex(s.index, method='ffill')
    assert_series_equal(reindexed, reindexed2)

    expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
    assert_series_equal(reindexed, expected)

    # GH4604
    s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
    new_index = ['a', 'g', 'c', 'f']
    expected = Series([1, 1, 3, 3], index=new_index)

    # this changes dtype because the ffill happens after
    result = s.reindex(new_index).ffill()
    assert_series_equal(result, expected.astype('float64'))

    result = s.reindex(new_index).ffill(downcast='infer')
    assert_series_equal(result, expected)

    expected = Series([1, 5, 3, 5], index=new_index)
    result = s.reindex(new_index, method='ffill')
    assert_series_equal(result, expected)

    # inference of new dtype
    s = Series([True, False, False, True], index=list('abcd'))
    new_index = 'agc'
    result = s.reindex(list(new_index)).ffill()
    expected = Series([True, True, False], index=list(new_index))
    assert_series_equal(result, expected)

    # GH4618 shifted series downcasting
    s = Series(False, index=range(0, 5))
    result = s.shift(1).fillna(method='bfill')
    expected = Series(False, index=range(0, 5))
    assert_series_equal(result, expected)
Example #50
0
def get_overlap_time_line(logs_dict):
    time_line_df = DataFrame(columns=['start_time', 'end_time'])
    if len(logs_dict) > 1:
        global_df = get_overlap_df(logs_dict)
        if len(global_df) > 1:
            time_series = Series(global_df.index)
            time_series_shift = time_series.shift()
            jump_series = (time_series - time_series_shift)[
                (time_series - time_series_shift) > timedelta(0, param.UNIT_TIME * param.TIME_JUMPING_FACTOR)]

            if len(jump_series) == 0:
                if series_is_larger_than_time_window(time_series, param.TIME_WINDOW_SIZE):
                    time_line_df = time_line_df.append(
                        pd.DataFrame([[time_series.iloc[0], time_series.iloc[-1]]], columns=time_line_df.columns))

            elif len(jump_series) == 1:
                if jump_series.iloc[0] == 0:
                    if series_is_larger_than_time_window(time_series[1:], param.TIME_WINDOW_SIZE):
                        time_line_df = time_line_df.append(
                            pd.DataFrame([[time_series.iloc[1], time_series.iloc[-1]]], columns=time_line_df.columns))
                elif jump_series.iloc[0] == len(time_series) - 1:
                    if series_is_larger_than_time_window(time_series.iloc[1:-1], param.TIME_WINDOW_SIZE):
                        time_line_df = time_line_df.append(
                            pd.DataFrame([[time_series.iloc[0], time_series.iloc[-2]]], columns=time_line_df.columns))

            time_lines = []
            for i in range(len(jump_series) - 1):
                j_id = jump_series.index[i]
                next_j_id = jump_series.index[i + 1]
                if i == 0:
                    if j_id > 1:
                        if series_is_larger_than_time_window(time_series.iloc[0:j_id], param.TIME_WINDOW_SIZE):
                            time_lines.append([time_series.iloc[0], time_series.iloc[j_id - 1]])
                if series_is_larger_than_time_window(time_series.iloc[j_id:next_j_id], param.TIME_WINDOW_SIZE):
                    time_lines.append([time_series.iloc[j_id], time_series.iloc[next_j_id - 1]])
            time_line_df = time_line_df.append(pd.DataFrame(time_lines, columns=time_line_df.columns))
    return time_line_df
Example #51
0
    def test_shift(self):
        shifted = self.ts.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, self.ts.index)
        tm.assert_index_equal(unshifted.index, self.ts.index)
        tm.assert_numpy_array_equal(unshifted.valid().values,
                                    self.ts.values[:-1])

        offset = datetools.bday
        shifted = self.ts.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        assert_series_equal(unshifted, self.ts)

        unshifted = self.ts.shift(0, freq=offset)
        assert_series_equal(unshifted, self.ts)

        shifted = self.ts.shift(1, freq='B')
        unshifted = shifted.shift(-1, freq='B')

        assert_series_equal(unshifted, self.ts)

        # corner case
        unshifted = self.ts.shift(0)
        assert_series_equal(unshifted, self.ts)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.valid().values, ps.values[:-1])

        shifted2 = ps.shift(1, 'B')
        shifted3 = ps.shift(1, datetools.bday)
        assert_series_equal(shifted2, shifted3)
        assert_series_equal(ps, shifted2.shift(-1, 'B'))

        self.assertRaises(ValueError, ps.shift, freq='D')

        # legacy support
        shifted4 = ps.shift(1, freq='B')
        assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=datetools.bday)
        assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH 8129
        index = date_range('2000-01-01', periods=5)
        for dtype in ['int32', 'int64']:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            assert_series_equal(result, expected)

        # xref 8260
        # with tz
        s = Series(date_range('2000-01-01 09:00:00', periods=5,
                              tz='US/Eastern'), name='foo')
        result = s - s.shift()

        exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')
        assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
                               tz='CET'), name='foo')
        self.assertRaises(ValueError, lambda: s - s2)
Example #52
0
ts.resample('D')

pd.date_range('4/1/2012', '6/1/2012')   #Generating date ranges
pd.date_range(start='4/1/2012', periods=20)
pd.date_range(end='6/1/2012', periods=20)
pd.date_range('1/1/2000', '12/1/2000', freq='BM')
pd.date_range('5/2/2012 12:56:31', periods=5)
pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)

pd.date_range('1/1/2000', periods=10, freq='1h30min')   #Frequencies and Date Offsets

# Shifting (leading and lagging) data
ts = Series(np.random.randn(4),
            index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts
ts.shift(2) #this kind of shift will cause drop some data
ts.shift(-2)
ts.shift(2,freq='M')    #this kind of shift just change the time

#Time Zone Handling=============================================

#Period==========================================================

#resample========================================================

#Time series plotting============================================
close_px_all = pd.read_csv('stock_px.csv', parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B', fill_method='ffill')
close_px.info()
def slide7():
    from pandas.tseries.offsets import Hour, Minute
    hour = Hour()
    print hour
    four_hours = Hour(4)
    print four_hours
    print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')

    print Hour(2) + Minute(30)
    print pd.date_range('1/1/2000', periods=10, freq='1h30min')

    ts = Series(np.random.randn(4),
                index=pd.date_range('1/1/2000', periods=4, freq='M'))
    print ts
    print ts.shift(2)
    print ts.shift(-2)
    print '2 M'
    print ts.shift(2, freq='M')
    print '3 D'
    print ts.shift(3, freq='D')
    print '1 3D'
    print ts.shift(1, freq='3D')
    print '1 90T'
    print ts.shift(1, freq='90T')

    print 'shifting dates with offsets'
    from pandas.tseries.offsets import Day, MonthEnd
    now = datetime(2011, 11, 17)
    print now + 3 * Day()
    print now + MonthEnd()
    print now + MonthEnd(2)

    offset = MonthEnd()
    print offset
    print offset.rollforward(now)
    print offset.rollback(now)

    ts = Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))
    print ts.groupby(offset.rollforward).mean()
Example #54
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(com.is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        self.assertIsInstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')
        self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize(
            'UTC').tz_convert(tz=s.dt.tz)))

        # indexing
        result = s.iloc[0]
        self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500',
                                           tz='US/Eastern', offset='D'))
        result = s[0]
        self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500',
                                           tz='US/Eastern', offset='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
class MySeries:
    def __init__(self, *args, **kwargs):
        self.x = Series(*args, **kwargs)
        self.values = self.x.values
        self.index = self.x.index
    
    def rolling_mean(self, *args, **kwargs):
        return MySeries(pd.rolling_mean(self.x, *args, **kwargs))

    def rolling_count(self, *args, **kwargs):
        return MySeries(pd.rolling_count(self.x, *args, **kwargs))

    def rolling_sum(self, *args, **kwargs):
        return MySeries(pd.rolling_sum(self.x, *args, **kwargs))

    def rolling_median(self, *args, **kwargs):
        return MySeries(pd.rolling_median(self.x, *args, **kwargs))
        
    def rolling_min(self, *args, **kwargs):
        return MySeries(pd.rolling_min(self.x, *args, **kwargs))

    def rolling_max(self, *args, **kwargs):
        return MySeries(pd.rolling_max(self.x, *args, **kwargs))

    def rolling_std(self, *args, **kwargs):
        return MySeries(pd.rolling_std(self.x, *args, **kwargs))

    def rolling_var(self, *args, **kwargs):
        return MySeries(pd.rolling_var(self.x, *args, **kwargs))

    def rolling_skew(self, *args, **kwargs):
        return MySeries(pd.rolling_skew(self.x, *args, **kwargs))

    def rolling_kurtosis(self, *args, **kwargs):
        return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs))

    def rolling_window(self, *args, **kwargs):
        return MySeries(pd.rolling_window(self.x, *args, **kwargs))

    def cumprod(self, *args, **kwargs):
        return MySeries(self.x.cumprod(*args, **kwargs))

    def cumsum(self, *args, **kwargs):
        return MySeries(self.x.cumsum(*args, **kwargs))

    def diff(self, *args, **kwargs):
        return MySeries(self.x.diff(*args, **kwargs))

    def div(self, *args, **kwargs):
        return MySeries(self.x.div(*args, **kwargs))

    def mul(self, *args, **kwargs):
        return MySeries(self.x.mul(*args, **kwargs))

    def add(self, *args, **kwargs):
        return MySeries(self.x.add(*args, **kwargs))

    def dropna(self, *args, **kwargs):
        return MySeries(self.x.dropna(*args, **kwargs))
    
    def fillna(self, *args, **kwargs):
        return MySeries(self.x.fillna(*args, **kwargs))

    def floordiv(self, *args, **kwargs):
        return MySeries(self.x.floordiv(*args, **kwargs))

    def mod(self, *args, **kwargs):
        return MySeries(self.x.mod(*args, **kwargs))

    def nlargest(self, *args, **kwargs):
        return MySeries(self.x.nlargest(*args, **kwargs))

    def nonzero(self, *args, **kwargs):
        return MySeries(self.x.nonzero(*args, **kwargs))

    def nsmallest(self, *args, **kwargs):
        return MySeries(self.x.nsmallest(*args, **kwargs))

    def pow(self, *args, **kwargs):
        return MySeries(self.x.pow(*args, **kwargs))

    def rank(self, *args, **kwargs):
        return MySeries(self.x.rank(*args, **kwargs))

    def round(self, *args, **kwargs):
        return MySeries(self.x.round(*args, **kwargs))

    def shift(self, *args, **kwargs):
        return MySeries(self.x.shift(*args, **kwargs))

    def sub(self, *args, **kwargs):
        return MySeries(self.x.sub(*args, **kwargs))

    def abs(self, *args, **kwargs):
        return MySeries(self.x.abs(*args, **kwargs))

    def clip(self, *args, **kwargs):
        return MySeries(self.x.clip(*args, **kwargs))

    def clip_lower(self, *args, **kwargs):
        return MySeries(self.x.clip_lower(*args, **kwargs))

    def clip_upper(self, *args, **kwargs):
        return MySeries(self.x.clip_upper(*args, **kwargs))
    
    def interpolate(self, *args, **kwargs):
        return MySeries(self.x.interpolate(*args, **kwargs))

    def resample(self, *args, **kwargs):
        return MySeries(self.x.resample(*args, **kwargs))
        
    def replace(self, *args, **kwargs):
        return MySeries(self.x.replace(*args, **kwargs))
Example #56
0
    def test_shift(self):
        shifted = self.ts.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, self.ts.index)
        tm.assert_index_equal(unshifted.index, self.ts.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values,
                                    self.ts.values[:-1])

        offset = BDay()
        shifted = self.ts.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        assert_series_equal(unshifted, self.ts)

        unshifted = self.ts.shift(0, freq=offset)
        assert_series_equal(unshifted, self.ts)

        shifted = self.ts.shift(1, freq='B')
        unshifted = shifted.shift(-1, freq='B')

        assert_series_equal(unshifted, self.ts)

        # corner case
        unshifted = self.ts.shift(0)
        assert_series_equal(unshifted, self.ts)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])

        shifted2 = ps.shift(1, 'B')
        shifted3 = ps.shift(1, BDay())
        assert_series_equal(shifted2, shifted3)
        assert_series_equal(ps, shifted2.shift(-1, 'B'))

        msg = "Given freq D does not match PeriodIndex freq B"
        with pytest.raises(ValueError, match=msg):
            ps.shift(freq='D')

        # legacy support
        shifted4 = ps.shift(1, freq='B')
        assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH 8129
        index = date_range('2000-01-01', periods=5)
        for dtype in ['int32', 'int64']:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            assert_series_equal(result, expected)

        # xref 8260
        # with tz
        s = Series(date_range('2000-01-01 09:00:00', periods=5,
                              tz='US/Eastern'), name='foo')
        result = s - s.shift()

        exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')
        assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
                               tz='CET'), name='foo')
        msg = ("DatetimeArray subtraction must have the same timezones or no"
               " timezones")
        with pytest.raises(TypeError, match=msg):
            s - s2
Example #57
0
def position_to_return(inst: Instrument, position: pd.Series):
    return position.shift(1) * inst.ohlcv.CLOSE.diff(1)
Example #58
0
# encoding=utf-8

import pandas as pd
import numpy as np
from pandas import Series, DataFrame
from datetime import datetime
from pandas.tseries.offsets import Day, MonthEnd

# 移动(超前/滞后)数据
# 移动 shifting : 将时间前移或者后移

# Series和DataFrame都有一个对应的shift方法用于执行单纯的前移或者后移,同时保持索引不变
ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))
print ts
# 向后移动
print ts.shift(2)
# 向前移动
print ts.shift(-2)

# 通常是用shift来计算一个时间序列或者多个时间序列中百分比的变化
print ts / ts.shift(1) - 1

# 只是移动shift并不会修改索引, 可以指定频率来对时间戳移动, 而不是移动数据,然后产生NaN
print ts.shift(2, freq='M')
print ts.shift(3, freq='D')
print ts.shift(1, freq='3D')
print ts.shift(1, freq='90T')

# 通过偏移量对日期进行位移
now = datetime(2011, 11, 17)
print now + 3 * Day()
Example #59
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        assert s.dtype.name == 'datetime64[ns, US/Eastern]'
        assert s.dtype == 'datetime64[ns, US/Eastern]'
        assert is_datetime64tz_dtype(s.dtype)
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # export
        result = s.values
        assert isinstance(result, np.ndarray)
        assert result.dtype == 'datetime64[ns]'

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        tm.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        assert result == Timestamp('2013-01-01 00:00:00-0500',
                                   tz='US/Eastern', freq='D')
        result = s[0]
        assert result == Timestamp('2013-01-01 00:00:00-0500',
                                   tz='US/Eastern', freq='D')

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # short str
        assert 'datetime64[ns, US/Eastern]' in str(s)

        # formatting with NaT
        result = s.shift()
        assert 'datetime64[ns, US/Eastern]' in str(result)
        assert 'NaT' in str(result)

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        assert 'datetime64[ns, US/Eastern]' in str(t)

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')])
        assert s.dtype == 'datetime64[ns, US/Pacific]'
        assert lib.infer_dtype(s) == 'datetime64'

        s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
                    pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')])
        assert s.dtype == 'object'
        assert lib.infer_dtype(s) == 'datetime'

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
Example #60
0
# 频率和日期偏移量
from pandas.tseries.offsets import Hour, Minute
# 间隔频率为4小时
date = pd.date_range('2000 1 1','2000 1 5',freq='4h')
# print date
# 间隔频率为1小时30分
date = pd.date_range('2000 1 1','2000 1 5',freq='1h30min')
# print date

# 移动数据
# shift可以沿时间轴前移或后移
ts = Series(np.random.randn(4),
            index=pd.date_range('1/1/2000', periods=4, freq='M'))

# 移动数据
lagging_ts = ts.shift(2)
leading_ts = ts.shift(-2)
# print ts
# print lagging_ts
# print leading_ts

# 移动时间index ,按月份移动
shift_ts = ts.shift(2, freq='M')
# print ts
# print shift_ts

# 通过偏移量对日期位移
hours_3 = Hour(3)
now = datetime.now()
three_hours_later = now + hours_3
# print three_hours_later