Example #1
0
 def _detect(self, data: pd.Series) -> pd.Series:
     if self._direction == "both":
         return np.abs(data.diff()) > self._max_diff
     elif self._direction == "positive":
         return data.diff() > self._max_diff
     else:
         return data.diff() < -self._max_diff
    def test_diff_tz(self):
        # Combined datetime diff, normal diff and boolean diff test
        ts = tm.makeTimeSeries(name="ts")
        ts.diff()

        # neg n
        result = ts.diff(-1)
        expected = ts - ts.shift(-1)
        tm.assert_series_equal(result, expected)

        # 0
        result = ts.diff(0)
        expected = ts - ts
        tm.assert_series_equal(result, expected)

        # datetime diff (GH#3100)
        s = Series(date_range("20130102", periods=5))
        result = s.diff()
        expected = s - s.shift(1)
        tm.assert_series_equal(result, expected)

        # timedelta diff
        result = result - result.shift(1)  # previous result
        expected = expected.diff()  # previously expected
        tm.assert_series_equal(result, expected)

        # with tz
        s = Series(date_range("2000-01-01 09:00:00",
                              periods=5,
                              tz="US/Eastern"),
                   name="foo")
        result = s.diff()
        expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
        tm.assert_series_equal(result, expected)
Example #3
0
class Timeseries(object):

    goal_time = 0.2

    params = [None, 'US/Eastern']
    param_names = ['tz']

    def setup(self, tz):
        self.N = 10**6
        self.halfway = ((self.N // 2) - 1)
        self.s = Series(date_range('20010101', periods=self.N, freq='T',
                                   tz=tz))
        self.ts = self.s[self.halfway]

        self.s2 = Series(date_range('20010101', periods=self.N, freq='s',
                                    tz=tz))

    def time_series_timestamp_compare(self, tz):
        self.s <= self.ts

    def time_timestamp_series_compare(self, tz):
        self.ts >= self.s

    def time_timestamp_ops_diff(self, tz):
        self.s2.diff()

    def time_timestamp_ops_diff_with_shift(self, tz):
        self.s - self.s.shift()
Example #4
0
    def test_timedelta_ops(self):
        # GH#4984
        # make sure ops return Timedelta
        s = Series([
            Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)
        ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        assert result == expected

        result = td.to_frame().mean()
        assert result[0] == expected

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600, 'ms'))
        assert result == expected

        result = td.median()
        expected = to_timedelta('00:00:09')
        assert result == expected

        result = td.to_frame().median()
        assert result[0] == expected

        # GH#6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        assert result == expected

        result = td.to_frame().sum()
        assert result[0] == expected

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        assert result == expected

        result = td.to_frame().std()
        assert result[0] == expected

        # invalid ops
        for op in ['skew', 'kurt', 'sem', 'prod']:
            msg = "reduction operation '{}' not allowed for this dtype"
            with pytest.raises(TypeError, match=msg.format(op)):
                getattr(td, op)()

        # GH#10040
        # make sure NaT is properly handled by median()
        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
        assert s.diff().median() == timedelta(days=4)

        s = Series([
            Timestamp('2015-02-03'),
            Timestamp('2015-02-07'),
            Timestamp('2015-02-15')
        ])
        assert s.diff().median() == timedelta(days=6)
Example #5
0
def calculate_test_positivity(positive_tests: pd.Series,
                              negative_tests: pd.Series,
                              smooth: int = 7,
                              lag_lookback: int = 7) -> pd.Series:
    """Calculates positive test rate.

    Args:
        positive_tests: Number of cumulative positive tests.
        negative_tests: Number of cumulative negative tests.

    Returns:
        Positive test rate.
    """
    daily_negative_tests = negative_tests.diff()
    daily_positive_tests = positive_tests.diff()
    positive_smoothed = series_utils.smooth_with_rolling_average(
        daily_positive_tests)
    negative_smoothed = series_utils.smooth_with_rolling_average(
        daily_negative_tests, include_trailing_zeros=False)
    last_n_positive = positive_smoothed[-lag_lookback:]
    last_n_negative = negative_smoothed[-lag_lookback:]

    if any(last_n_positive) and last_n_negative.isna().all():
        return pd.Series([], dtype="float64")

    return positive_smoothed / (negative_smoothed + positive_smoothed)
Example #6
0
def volume_ratio(price: Series, volume: Series, period: int) -> Series:
    """
    거래량비율

    <설명>
    거래량비율(Volume Ratio)을 구하는 함수입니다.
    거래량비율(Volume Ratio)은 일정 기간 동안의 상승일의 거래량과 하락일의 거래량을 비교합니다.
    거래량비율(Volume Ratio)은 0에서 1사이의 값으로 표현됩니다.

    <사용 방법>
    첫 번째 인자에는 거래량비율(Volume Ratio)을 구하는데 사용하는 가격을,
    두 번째 인자에는 거래량을,
    세 번째 인자에는 거래량비율(Volume Ratio)을 구하는데 사용하는 기간을 적으면 됩니다.
    예를 들어, 20일간의 종가를 이용한 거래량비율(Volume Ratio)을 구하고자 하는 경우에는
    'volume_ratio(close, volume, 20)' 또는 '거래량비율(종가, 거래량, 20)'과 같이 작성하면 됩니다.

    :param price: (가격데이터) 거래량비율(Volume Ratio)을 구할 때 사용하는 가격 ex) 시가, 고가, 저가, 종가
    :param volume: (거래량) 거래량
    :param period: (기간) 거래량비율(Volume Ratio)을 구하는데 사용하는 기간
    :return:
    """
    up = np.where(price.diff(1).gt(0), volume, 0)
    down = np.where(price.diff(1).lt(0), volume, 0)
    maintain = np.where(price.diff(1).equals(0), volume.mul(0.5), 0)

    up = up + maintain
    down = down + maintain
    sum_up = Series(up).rolling(window=period, min_periods=period).sum()
    sum_down = Series(down).rolling(window=period, min_periods=period).sum()
    return sum_up.div(sum_down)
Example #7
0
def calc_glider_vert_velocity(time, depth):
    """
    Calculate glider vertical velocity in cm/s

    Parameters
    ----------
    time : np.array [datetime64]
        glider time dimension
    depth : np.array [float]
        depth (m) or pressure (dbar) if depth not avail

    Returns
    -------
    velocity : np.array
        vertical velocity in cm/s
    """
    from numpy import array
    from pandas import Series

    # Converting time from datetime 64 to seconds since deployment
    t_ns = array(time).astype("datetime64[ns]").astype(float)
    t_s = Series((t_ns - t_ns.min()) / 1e9)

    # converting pressure from dbar/m to cm
    p_m = array(depth).astype(float)
    p_cm = Series(p_m * 100)

    # velocity in cm/s
    velocity = p_cm.diff() / t_s.diff()

    return velocity
Example #8
0
def _segment_until(signal1: pd.Series,
                   signal2: pd.Series,
                   s: float,
                   t: float,
                   z_max: float,
                   out: pd.Series = None):

    z = pd.Series()
    x = signal1.reindex(signal1.index.union([s, t])).interpolate(
        'values', limit_direction='both')
    dx = -signal1.diff(-1).fillna(0)

    y = signal2.reindex(signal2.index.union([s, t])).interpolate(
        'values', limit_direction='both')
    dy = -signal2.diff(-1).fillna(0)

    if dx[s] <= 0:
        z1 = _compute_segment_and(x, y, s, t)
        z2 = _compute_partial_eventually(z1, s, t)

        i = pd.Series()
        i[s] = min(z_max, x[t])
        z = _compute_segment_or(i, z2, s, t)
    else:
        z1 = _compute_partial_eventually(y, s, t)
        z2 = _compute_segment_and(x, z1, s, t)
        z3 = pd.Series()
        z3[s] = z_max
        z1 = _compute_segment_and(x, z3, s, t)
        z = _compute_segment_or(z1, z2, s, t)

    z.sort_index(inplace=True)
    if out is not None and isinstance(out, pd.Series):
        out.update(z)
    return z
Example #9
0
class Timeseries(object):

    goal_time = 0.2

    params = [None, 'US/Eastern']
    param_names = ['tz']

    def setup(self, tz):
        N = 10**6
        halfway = (N // 2) - 1
        self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz))
        self.ts = self.s[halfway]

        self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz))

    def time_series_timestamp_compare(self, tz):
        self.s <= self.ts

    def time_timestamp_series_compare(self, tz):
        self.ts >= self.s

    def time_timestamp_ops_diff(self, tz):
        self.s2.diff()

    def time_timestamp_ops_diff_with_shift(self, tz):
        self.s - self.s.shift()
Example #10
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return Timedelta
        s = Series([
            Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)
        ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        self.assertEqual(result, expected)

        result = td.to_frame().mean()
        self.assertEqual(result[0], expected)

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600, 'ms'))
        self.assertEqual(result, expected)

        result = td.median()
        expected = to_timedelta('00:00:09')
        self.assertEqual(result, expected)

        result = td.to_frame().median()
        self.assertEqual(result[0], expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        self.assertEqual(result, expected)

        result = td.to_frame().sum()
        self.assertEqual(result[0], expected)

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        self.assertEqual(result, expected)

        result = td.to_frame().std()
        self.assertEqual(result[0], expected)

        # invalid ops
        for op in ['skew', 'kurt', 'sem', 'prod']:
            pytest.raises(TypeError, getattr(td, op))

        # GH 10040
        # make sure NaT is properly handled by median()
        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
        self.assertEqual(s.diff().median(), timedelta(days=4))

        s = Series([
            Timestamp('2015-02-03'),
            Timestamp('2015-02-07'),
            Timestamp('2015-02-15')
        ])
        self.assertEqual(s.diff().median(), timedelta(days=6))
Example #11
0
    def test_timedelta_ops(self):
        # GH#4984
        # make sure ops return Timedelta
        s = Series([Timestamp('20130101') + timedelta(seconds=i * i)
                    for i in range(10)])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        assert result == expected

        result = td.to_frame().mean()
        assert result[0] == expected

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600, 'ms'))
        assert result == expected

        result = td.median()
        expected = to_timedelta('00:00:09')
        assert result == expected

        result = td.to_frame().median()
        assert result[0] == expected

        # GH#6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        assert result == expected

        result = td.to_frame().sum()
        assert result[0] == expected

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        assert result == expected

        result = td.to_frame().std()
        assert result[0] == expected

        # invalid ops
        for op in ['skew', 'kurt', 'sem', 'prod']:
            msg = "reduction operation '{}' not allowed for this dtype"
            with pytest.raises(TypeError, match=msg.format(op)):
                getattr(td, op)()

        # GH#10040
        # make sure NaT is properly handled by median()
        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
        assert s.diff().median() == timedelta(days=4)

        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'),
                    Timestamp('2015-02-15')])
        assert s.diff().median() == timedelta(days=6)
Example #12
0
def series_differentiation(series: Series, order: int) -> Series:
    """
    Timeseries differentiation.Usefull for detrend series
    """
    if order == 1:
        return series.diff()
    if order == 2:
        return series.diff().diff()
    else:
        print("It doesn't have sense ")
Example #13
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return Timedelta
        s = Series([Timestamp('20130101') + timedelta(seconds=i * i)
                    for i in range(10)])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        self.assertEqual(result, expected)

        result = td.to_frame().mean()
        self.assertEqual(result[0], expected)

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600, 'ms'))
        self.assertEqual(result, expected)

        result = td.median()
        expected = to_timedelta('00:00:09')
        self.assertEqual(result, expected)

        result = td.to_frame().median()
        self.assertEqual(result[0], expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        self.assertEqual(result, expected)

        result = td.to_frame().sum()
        self.assertEqual(result[0], expected)

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        self.assertEqual(result, expected)

        result = td.to_frame().std()
        self.assertEqual(result[0], expected)

        # invalid ops
        for op in ['skew', 'kurt', 'sem', 'prod']:
            self.assertRaises(TypeError, getattr(td, op))

        # GH 10040
        # make sure NaT is properly handled by median()
        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
        self.assertEqual(s.diff().median(), timedelta(days=4))

        s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'),
                    Timestamp('2015-02-15')])
        self.assertEqual(s.diff().median(), timedelta(days=6))
    def test_timedelta_ops(self):
        # GH#4984
        # make sure ops return Timedelta
        s = Series([
            Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)
        ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        assert result == expected

        result = td.to_frame().mean()
        assert result[0] == expected

        result = td.quantile(0.1)
        expected = Timedelta(np.timedelta64(2600, "ms"))
        assert result == expected

        result = td.median()
        expected = to_timedelta("00:00:09")
        assert result == expected

        result = td.to_frame().median()
        assert result[0] == expected

        # GH#6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta("00:01:21")
        assert result == expected

        result = td.to_frame().sum()
        assert result[0] == expected

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        assert result == expected

        result = td.to_frame().std()
        assert result[0] == expected

        # GH#10040
        # make sure NaT is properly handled by median()
        s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")])
        assert s.diff().median() == timedelta(days=4)

        s = Series([
            Timestamp("2015-02-03"),
            Timestamp("2015-02-07"),
            Timestamp("2015-02-15")
        ])
        assert s.diff().median() == timedelta(days=6)
Example #15
0
def get_bvc_buy_volume(close: pd.Series,
                       volume: pd.Series,
                       window: int = 20) -> pd.Series:
    """

    :param close: (pd.Series): series of close prices
    :param volume: (pd.Series): series of bar volumes
    :param window: (int); window for std estimation uses in BVC calculation
    :return:
    """
    return volume * norm.cdf(
        close.diff() / close.diff().rolling(window=window).std())
def get_bvc_buy_volume(close: pd.Series, volume: pd.Series, window: int = 20) -> pd.Series:
    """
    Calculates the BVC buy volume

    :param close: (pd.Series): Close prices
    :param volume: (pd.Series): Bar volumes
    :param window: (int): Window for std estimation uses in BVC calculation
    :return: (pd.Series) BVC buy volume
    """
    # .apply(norm.cdf) is used to omit Warning for norm.cdf(pd.Series with NaNs)

    return volume * (close.diff() / close.diff().rolling(window=window).std()).apply(norm.cdf)
Example #17
0
def adx(high: pd.Series, low: pd.Series, smooth: int = 14, di_len: int = 14):
    diffHigh, diffLow = high.diff(), low.diff()
    diffHighAbs, diffLowAbs = diffHigh.abs(), diffLow.abs()
    posDM, negDM = diffHigh.abs(), diffLow.abs()
    posDM[(diffHighAbs <= diffLowAbs) | (diffHigh < 0)] = 0.0
    negDM[(diffLowAbs <= diffHighAbs) | (diffLow < 0)] = 0.0
    atr = average_true_range(high, low, di_len)
    posDI = modified_moving_average(posDM, smooth) * 100.0 / atr
    negDI = modified_moving_average(negDM, smooth) * 100.0 / atr
    dx = (posDI - negDI).abs() * 100.0 / (posDI + negDI)
    adx = modified_moving_average(dx, smooth)
    return {'adx': adx, 'pos': posDI, 'neg': negDI}
Example #18
0
    def calc_td(series: Series, direction: str, show_all: bool):
        td_bool = series.diff(4) > 0 if direction=="up" else series.diff(4) < 0
        td_num = npWhere(
            td_bool, td_bool.rolling(13, min_periods=0).apply(true_sequence_count), 0
        )
        td_num = Series(td_num)

        if show_all:
            td_num = td_num.mask(td_num == 0)
        else:
            td_num = td_num.mask(~td_num.between(6,9))

        return td_num
Example #19
0
    def rsin(x: pd.Series,
             window: int,
             ewm: bool,
             groupfreq: AnyStr = '') -> np.array:
        """
        Calculate normalized relative strength index.
        """

        ma_u = TechAnalysis.moving_avg(np.maximum(x.diff(), 0), window, ewm,
                                       groupfreq)
        ma_l = TechAnalysis.moving_avg(-np.minimum(x.diff(), 0), window, ewm,
                                       groupfreq)
        return (ma_u - ma_l) / (ma_u + ma_l)
Example #20
0
def pdi(price_high: Series, price_low: Series, price_close: Series,
        period: int, moving_average: MovingAverage) -> Series:
    """
    매수방향지표

    <설명>
    매수방향지표(PDI)를 구하는 함수입니다.
    매수방향지표(PDI)는 실질적으로 상승하는 폭의 비율을 나타냅니다.
    매수방향지표(PDI)는 0에서 1사이의 값으로 표현됩니다.

    <사용방법>
    첫 번째 인자에는 고가를,
    두 번째 인자에는 저가를,
    세 번째 인자에는 종가를,
    네 번째 인자에는 매수방향지표(PDI)를 구하는데 사용하는 기간을,
    다섯 번째 인자에는 매수방향지표(PDI)를 구하는데 사용하는 이동 평균 종류를 적으면 됩니다.
    예를 들어, 지수 이동 평균을 이용한 14일간 매수방향지표(PDI)를 구하고자 하는 경우
    'pdi(high, low, close, 14, ema)' 또는 '매수방향지표(고가, 저가, 종가, 14, 지수이동평균)'과 같이 작성하면 됩니다.

    :param price_high: (고가) 고가
    :param price_low: (저가) 저가
    :param price_close: (종가) 종가
    :param period: (기간) 매수방향지표(PDI)를 구하는데 사용하는 기간
    :param moving_average: (이동평균종류) 매수방향지표(PDI)를 구하는데 사용하는 이동 평균 종류 ex) 단순 이동평균, 지수 이동평균, 가중 이동평균
    :return:
    """
    pdm = np.where(((price_high.diff(1) > 0) &
                    (price_high.diff(1) > price_low.shift(1) - price_low)),
                   price_high.diff(1), 0)

    if moving_average == MovingAverage.sma:
        pdmn = sma(Series(pdm), period)
    elif moving_average == MovingAverage.ema:
        pdmn = ema(Series(pdm), period)
    elif moving_average == MovingAverage.ewma:
        pdmn = ewma(Series(pdm), period)
    elif moving_average == MovingAverage.wma:
        pdmn = wma(Series(pdm), period)

    tr = _tr(price_high, price_low, price_close)

    if moving_average == MovingAverage.sma:
        trn = sma(tr, period)
    elif moving_average == MovingAverage.ema:
        trn = ema(tr, period)
    elif moving_average == MovingAverage.ewma:
        trn = ewma(tr, period)
    elif moving_average == MovingAverage.wma:
        trn = wma(tr, period)

    return pdmn.divide(trn)
Example #21
0
def robust_daily_vol_given_price(price: pd.Series, **kwargs):
    price = price.resample("1B").ffill()
    daily_returns = price.diff()

    vol = robust_vol_calc(daily_returns, **kwargs)

    return vol
Example #22
0
def get_chow_type_stat(series: pd.Series, min_length: int = 20) -> pd.Series:
    """
    Multithread implementation of Chow-Type Dickey-Fuller Test, p.251-252
    :param series: (pd.Series) series to test
    :param min_length: (int) minimum sample length used to estimate statistics
    :param num_threads: (int): number of cores to use
    :return: (pd.Series) of Chow-Type Dickey-Fuller Test statistics
    """
    # Indices to test. We drop min_length first and last values
    molecule = series.index[min_length:series.shape[0] - min_length]
    molecule = molecule.values
    molecule_range = np.arange(0, len(molecule))

    series_diff = series.diff().dropna()
    series_diff = series_diff.values
    series_lag = series.shift(1).dropna()
    series_lag_values = series_lag.values
    series_lag_times_ = series_lag.index.values
    series_lag_values_start = np.where(
        series_lag_times_ == molecule[0])[0].item() + 1

    dfc_series = _get_dfc_for_t(molecule_range, series_lag_values_start,
                                series_diff, series_lag_values)

    dfc_series = pd.Series(dfc_series, index=molecule)

    return dfc_series
Example #23
0
    def __init__(self,
                 prices: dict,
                 trading_signal: Series,
                 initialcash: float = 1000):
        """
        Parameters
        -----------
        prices :  instrument price
        trading_signal : capital to invest (long+,short-) or number of shares
        initialcash : float = 1000 starting cash

        """

        # first thing to do is to clean up the signal, removing nans and duplicate entries or exits
        self.trades = trading_signal.diff()

        # now create internal data structure
        split = int((len(prices) - 9) * 0.8)
        self.prices = pd.DataFrame.from_dict(
            prices, orient="index").iloc[split + 9:, :]
        self.data = pd.DataFrame(
            index=self.prices.index,
            columns=['prices', 'shares', 'value', 'cash', 'pnl'])
        self.data['prices'] = self.prices

        self.data['shares'] = self.trades.fillna(0)
        self.data['value'] = self.data['shares'] * self.data['prices'].fillna(
            0)

        delta = self.data['shares'].diff()  # shares bought sold

        self.data['cash'] = (
            -delta * self.data['prices']).fillna(0).cumsum() + initialcash
        self.data['pnl'] = (self.data['cash'] + self.data['value'] -
                            initialcash).fillna(0)
Example #24
0
def __clean_artifacts(data: pd.Series, threshold=0.2) -> pd.Series:
    """
    Cleans obviously illegal IBI values (artefacts) from a list

    Parameters
    ----------
    data : pd.Series
        the IBI list
    threshold : float, optional
        the maximum relative deviation between subsequent intervals, by default 0.2

    Returns
    -------
    pd.Series
        the cleaned IBIs
    """

    # Artifact detection - Statistical
    # for index in trange(data.shape[0]):
    #    # Remove RR intervals that differ more than 20% from the previous one
    #    if np.abs(data.iloc[index] - data.iloc[index - 1]) > 0.2 * data.iloc[index]:
    #        data.iloc[index] = np.nan

    # efficiency instead of loop ;-)
    diff = data.diff().abs()
    drop_indices = diff > threshold * data
    if drop_indices.any():
        data.drop(data[drop_indices].index, inplace=True)
    drop_indices = (data < 250) | (data > 2000)
    if drop_indices.any():
        data.drop(data[drop_indices].index,
                  inplace=True)  # drop by bpm > 240 or bpm < 30
    data.dropna(inplace=True)  # just to be sure

    return data
Example #25
0
    def adjacent_to_irregular(df_resampled: pd.Series, samp_freq: int):
        '''Hàm chuyển đổi từ loại dữ liệu chuỗi 2A (sau tái chọn mẫu), sang loại dữ liệu chuỗi 2B
        Chỉ áp dụng cho chuỗi kết quả Hypnogram
        : df_resample: pandas Series, có nội dung là 1 chuỗi liên tục các label, có datetime index,
        : samp_freq: tần số lấy mẫu, 1 số nguyên, thí dụ 10, 30
        : return: 1 dataframe có cấu trúc: mỗi hàng là 1 biến cố,
        4 cột: evt_start/stop = thời điểm bắt đầu, thời điểm kết thúc;
        evt_info: độ dài của biến cố, tính bằng giây,
        evt_value: Nhãn của biến cố, quy định trong dictionary
        '''

        fs = np.round(float(1 / samp_freq), 5)

        grp = np.array([[int(k), float(len(list(g)) / samp_freq)]
                        for k, g in (groupby(df_resampled))])

        change_point = np.array(
            df_resampled.diff()[lambda x: x != 0].index.tolist())

        df_event = pd.DataFrame({
            'evt_start':
            change_point,
            'evt_stop':
            np.array([
                s + pd.Timedelta(f"{i}S") - pd.Timedelta(f"{fs}S")
                for s, i in zip(change_point, grp[:, 1])
            ]),
            'evt_info':
            grp[:, 1],
            'evt_value':
            grp[:, 0],
        })

        return df_event
Example #26
0
    def test_timedelta_ops(self):
        _skip_if_numpy_not_friendly()

        # GH4984
        # make sure ops return timedeltas
        s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ])
        td = s.diff()

        result = td.mean()[0]
        # TODO This should have returned a scalar to begin with. Hack for now.
        expected = to_timedelta(timedelta(seconds=9))
        tm.assert_almost_equal(result, expected)

        result = td.quantile(.1)
        # This properly returned a scalar.
        expected = to_timedelta('00:00:02.6')
        tm.assert_almost_equal(result, expected)

        result = td.median()[0]
        # TODO This should have returned a scalar to begin with. Hack for now.
        expected = to_timedelta('00:00:08')
        tm.assert_almost_equal(result, expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()[0]
        expected = to_timedelta('00:01:21')
        tm.assert_almost_equal(result, expected)
Example #27
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return Timedelta
        s = Series([
            Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)
        ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        self.assertEqual(result, expected)

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600, 'ms'))
        self.assertEqual(result, expected)

        result = td.median()
        expected = to_timedelta('00:00:08')
        self.assertEqual(result, expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        tm.assert_almost_equal(result, expected)
        self.assertEqual(result, expected)
Example #28
0
def unsigned_differences(series: Series, amount: int = None, **kwargs) -> Series:
    """Unsigned Differences
    Returns two Series, an unsigned positive and unsigned negative series based
    on the differences of the original series. The positive series are only the
    increases and the negative series is only the decreases.

    Default Example:
    series   = Series([3, 2, 2, 1, 1, 5, 6, 6, 7, 5, 3]) and returns
    postive  = Series([0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0])
    negative = Series([0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1])
    """
    amount = int(amount) if amount is not None else 1
    negative = series.diff(amount)
    negative.fillna(0, inplace=True)
    positive = negative.copy()

    positive[positive <= 0] = 0
    positive[positive > 0] = 1

    negative[negative >= 0] = 0
    negative[negative < 0] = 1

    if kwargs.pop("asint", False):
        positive = positive.astype(int)
        negative = negative.astype(int)

    return positive, negative
Example #29
0
def psychological_line(price: Series, period: int) -> Series:
    """
    투자심리도

    <설명>
    투자심리도(Psychological Line)를 구하는 함수입니다.
    투자심리도(Psychological Line)를 이용하면 과열 및 침체도를 파악할 수 있습니다.
    투자심리도(Psychological Line)는 0에서 1사이의 값으로 표현됩니다.

    <사용 방법>
    첫 번째 인자에는 투자심리도(Psychological Line)를 구하는데 사용하는 가격을,
    두 번째 인자에는 투자심리도(Psychological Line)를 구하는데 사용하는 기간을 적으면 됩니다.
    예를 들어, 10일간의 종가를 이용한 투자심리도(Psychological Line)를 구하고자 하는 경우에는
    'psychological_line(close, 10)' 또는 '투자심리도(종가, 10)'과 같이 작성하면 됩니다.

    <계산 방법>
    10일간의 종가를 이용한 투자심리도(Psychological Line)는 다음과 같이 구합니다.
    (10일간 전일 종가 대비 상승 일수) / 10

    :param price: (가격데이터) 투자심리도(Psychological Line)를 구할 때 사용하는 가격 ex) 시가, 고가, 저가, 종가
    :param period: (기간) 투자심리도(Psychological Line)를 구하는데 사용하는 기간
    :return:
    """
    up = np.where(price.diff(1).gt(0), 1, 0)
    sum_up = Series(up).rolling(window=period, min_periods=period).sum()

    return sum_up.divide(period)
Example #30
0
def import_bid_ask_data():
    rootdir='I:/tickhdf_stk'
    savedir='F:/data/xccdata/bid_ask'
    #rootdir = '/Users/harbes/data/xccdata/bid_ask'
    # rootdir = 'F:/data/xccdata/bid_ask'
    li_ = [i for i in os.listdir(rootdir) if not i.endswith('_') and not i.endswith('.h5')]  # 列出文件夹下所有的目录与文件
    os.mkdir(savedir + '/effective_spread_')  # 生成文件夹
    now0 = time.time()
    for i in li_[150:]:  # Mac要额外注意 # Series&np.array 一天数据大约需要12s
        # path = rootdir + '/' + i
        f = h5py.File(rootdir+ '/' + i, 'r')
        effective_spread = Series(np.nan, index=np.array(f['stk']))
        for stk in f['stk']:  # ['603611']:#['000031']:# ['000504']
            bid = np.array(f['stk'][stk]['bidPrc_1'])  # Series(f['stk'][stk]['bidPrc_1']) #
            ask = np.array(f['stk'][stk]['askPrc_1'])  # Series(f['stk'][stk]['askPrc_1'])#
            prc = np.array(f['stk'][stk]['lastPrc'])  # Series(f['stk'][stk]['lastPrc']) #
            volume = Series(f['stk'][stk]['volume'])  # np.array(f['stk'][stk]['volume'])[(bid>0) & (ask>0)] #
            volume = volume.diff(1).fillna(volume[0])
            # DataFrame({'bid': bid, 'ask': ask, 'prc': prc, 'volume': volume})#, 'trend':trend})
            tmp = np.sum((volume * prc)[(bid > 0) & (ask > 0)])
            effective_spread[stk] = 0 if tmp == 0 else 2 * np.sum(
                (np.abs(2 * prc / (bid + ask) - 1) * volume * prc)[(bid > 0) & (ask > 0)]) / tmp
        # effective_spread[effective_spread <= 0] = np.nan # 也可以把所有数据归总后再设置
        effective_spread.to_pickle(savedir + '/effective_spread_/' + i)
    print(time.time() - now0)
Example #31
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return timedeltas
        s = Series([
            Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)
        ])
        td = s.diff()

        result = td.mean()[0]
        # TODO This should have returned a scalar to begin with. Hack for now.
        expected = to_timedelta(timedelta(seconds=9))
        tm.assert_almost_equal(result, expected)

        result = td.quantile(.1)
        # This properly returned a scalar.
        expected = np.timedelta64(2599999999, 'ns')
        tm.assert_almost_equal(result, expected)

        result = td.median()[0]
        # TODO This should have returned a scalar to begin with. Hack for now.
        expected = to_timedelta('00:00:08')
        tm.assert_almost_equal(result, expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()[0]
        expected = to_timedelta('00:01:21')
        tm.assert_almost_equal(result, expected)
Example #32
0
def robust_daily_vol_given_price(price: pd.Series, **kwargs):
    price = prices_to_daily_prices(price)
    daily_returns = price.diff()

    vol = robust_vol_calc(daily_returns, **kwargs)

    return vol
Example #33
0
    def _gradient(self, data: pd.Series, periods: int = 1) -> pd.Series:
        dt = data.index.to_series().diff().dt.total_seconds()
        if dt.min() < 1e-15:
            raise ValueError("Input must be monotonic increasing")

        gradient = data.diff(periods=periods) / dt
        return gradient
def plot_time(time: pandas.Series):
    """
    make a probability density function estimate based on the data

    in this simulation, time interval is same distribution for all sensors and rooms

    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html
    """

    intervals = time.diff().dropna().dt.total_seconds()
    Nbin = 100

    Fa, Floc, Fscale = gamma.fit(intervals)

    ti = np.arange(0.01, 5, 0.01)  # arbitrary time interval range to plot over
    pd = gamma.pdf(ti, Fa, loc=Floc, scale=Fscale)  # fit

    ax = plt.figure().gca()
    ax.plot(ti, pd)
    ax.set_xlabel("Time Interval (seconds)")
    ax.set_ylabel("Probability")
    ax.set_title("Time interval observed")

    # add the measured data to the plot
    ax.hist(intervals, bins=Nbin)
 def consecutive_wins_losses(self):
     '''
     Calculates the positive and negative runs in the trade series.
     '''
     trade_df = self.as_dataframe().sort_values(by = 'exit')
     win_loss = sign(trade_df.base_return)
     # Create series which has just 1's and 0's
     positive = Series(hstack(([0], ((win_loss > 0) * 1).values, [0])))
     negative = Series(hstack(([0], ((win_loss < 0) * 1).values, [0])))
     pos_starts = positive.where(positive.diff() > 0)
     pos_starts = Series(pos_starts.dropna().index.tolist())
     pos_ends = positive.where(positive.diff() < 0)
     pos_ends = Series(pos_ends.dropna().index.tolist())
     positive_runs = pos_ends - pos_starts
     neg_starts = negative.where(negative.diff() > 0)
     neg_starts = Series(neg_starts.dropna().index.tolist())
     neg_ends = negative.where(negative.diff() < 0)
     neg_ends = Series(neg_ends.dropna().index.tolist())
     negative_runs = neg_ends - neg_starts
     return (positive_runs, negative_runs)
Example #36
0
    def test_diff(self):
        # Just run the function
        self.ts.diff()

        # int dtype
        a = 10000000000000000
        b = a + 1
        s = Series([a, b])

        rs = s.diff()
        self.assertEqual(rs[1], 1)

        # neg n
        rs = self.ts.diff(-1)
        xp = self.ts - self.ts.shift(-1)
        assert_series_equal(rs, xp)

        # 0
        rs = self.ts.diff(0)
        xp = self.ts - self.ts
        assert_series_equal(rs, xp)

        # datetime diff (GH3100)
        s = Series(date_range('20130102', periods=5))
        rs = s - s.shift(1)
        xp = s.diff()
        assert_series_equal(rs, xp)

        # timedelta diff
        nrs = rs - rs.shift(1)
        nxp = xp.diff()
        assert_series_equal(nrs, nxp)

        # with tz
        s = Series(
            date_range('2000-01-01 09:00:00', periods=5,
                       tz='US/Eastern'), name='foo')
        result = s.diff()
        assert_series_equal(result, Series(
            TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
Example #37
0
    def test_timedelta_fillna(self):
        # GH 3371
        s = Series([Timestamp("20130101"), Timestamp("20130101"), Timestamp("20130102"), Timestamp("20130103 9:01:01")])
        td = s.diff()

        # reg fillna
        result = td.fillna(0)
        expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
        assert_series_equal(result, expected)

        # interprested as seconds
        result = td.fillna(1)
        expected = Series(
            [timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]
        )
        assert_series_equal(result, expected)

        result = td.fillna(timedelta(days=1, seconds=1))
        expected = Series(
            [timedelta(days=1, seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]
        )
        assert_series_equal(result, expected)

        result = td.fillna(np.timedelta64(int(1e9)))
        expected = Series(
            [timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]
        )
        assert_series_equal(result, expected)

        from pandas import tslib

        result = td.fillna(tslib.NaT)
        expected = Series(
            [tslib.NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)], dtype="m8[ns]"
        )
        assert_series_equal(result, expected)

        # ffill
        td[2] = np.nan
        result = td.ffill()
        expected = td.fillna(0)
        expected[0] = np.nan
        assert_series_equal(result, expected)

        # bfill
        td[2] = np.nan
        result = td.bfill()
        expected = td.fillna(0)
        expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
        assert_series_equal(result, expected)
Example #38
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return Timedelta
        s = Series([Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        self.assertEqual(result, expected)

        result = td.to_frame().mean()
        self.assertEqual(result[0], expected)

        result = td.quantile(0.1)
        expected = Timedelta(np.timedelta64(2600, "ms"))
        self.assertEqual(result, expected)

        result = td.median()
        expected = to_timedelta("00:00:08")
        self.assertEqual(result, expected)

        result = td.to_frame().median()
        self.assertEqual(result[0], expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta("00:01:21")
        self.assertEqual(result, expected)

        result = td.to_frame().sum()
        self.assertEqual(result[0], expected)

        # std
        result = td.std()
        expected = to_timedelta(Series(td.dropna().values).std())
        self.assertEqual(result, expected)

        result = td.to_frame().std()
        self.assertEqual(result[0], expected)

        # invalid ops
        for op in ["skew", "kurt", "sem", "var", "prod"]:
            self.assertRaises(TypeError, lambda: getattr(td, op)())
Example #39
0
    def test_timedelta_ops(self):
        _skip_if_numpy_not_friendly()

        # GH4984
        # make sure ops return timedeltas
        s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        tm.assert_series_equal(result, expected)

        result = td.quantile(.1)
        expected = to_timedelta('00:00:02.6')
        tm.assert_series_equal(result, expected)

        result = td.median()
        expected = to_timedelta('00:00:08')
        tm.assert_series_equal(result, expected)
Example #40
0
    def test_timedelta_ops(self):
        # GH4984
        # make sure ops return Timedelta
        s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ])
        td = s.diff()

        result = td.mean()
        expected = to_timedelta(timedelta(seconds=9))
        self.assertEqual(result, expected)

        result = td.quantile(.1)
        expected = Timedelta(np.timedelta64(2600,'ms'))
        self.assertEqual(result, expected)

        result = td.median()
        expected = to_timedelta('00:00:08')
        self.assertEqual(result, expected)

        # GH 6462
        # consistency in returned values for sum
        result = td.sum()
        expected = to_timedelta('00:01:21')
        tm.assert_almost_equal(result, expected)
        self.assertEqual(result, expected)
Example #41
0
K   128.09496
L   113.08406
M   131.04049
N   114.04293
P   97.05276
Q   128.05858
R   156.10111
S   87.03203
T   101.04768
V   99.06841
W   186.07931
Y   163.06333'''

import pandas as pd; from pandas import DataFrame, Series

import numpy as np
t = [i.rstrip().split() for i in open('monoisotopic_mass_table.txt').readlines()]
mass = DataFrame(t, columns=['residue', 'mass'], dtype=float)

residue = np.array(mass.residue)
mass = np.array(mass.mass)

spec = Series(open('rosalind_spec.txt').readlines(), dtype=float)
increment = np.array(spec.diff()[1:])

peptide = ''.join([residue[np.where(abs(mass - i) < 0.0001)[0][0]] for i in increment])

f = open('rosalind_spec_sub.txt', 'wt')
f.write(peptide)
f.close()
Example #42
0
def turnover(series:pd.Series):
    ratios = series.diff().abs() / series.abs().rolling(window=system.n_bday_in_3m).mean() * system.n_bday_in_year
    return ratios
class MySeries:
    def __init__(self, *args, **kwargs):
        self.x = Series(*args, **kwargs)
        self.values = self.x.values
        self.index = self.x.index
    
    def rolling_mean(self, *args, **kwargs):
        return MySeries(pd.rolling_mean(self.x, *args, **kwargs))

    def rolling_count(self, *args, **kwargs):
        return MySeries(pd.rolling_count(self.x, *args, **kwargs))

    def rolling_sum(self, *args, **kwargs):
        return MySeries(pd.rolling_sum(self.x, *args, **kwargs))

    def rolling_median(self, *args, **kwargs):
        return MySeries(pd.rolling_median(self.x, *args, **kwargs))
        
    def rolling_min(self, *args, **kwargs):
        return MySeries(pd.rolling_min(self.x, *args, **kwargs))

    def rolling_max(self, *args, **kwargs):
        return MySeries(pd.rolling_max(self.x, *args, **kwargs))

    def rolling_std(self, *args, **kwargs):
        return MySeries(pd.rolling_std(self.x, *args, **kwargs))

    def rolling_var(self, *args, **kwargs):
        return MySeries(pd.rolling_var(self.x, *args, **kwargs))

    def rolling_skew(self, *args, **kwargs):
        return MySeries(pd.rolling_skew(self.x, *args, **kwargs))

    def rolling_kurtosis(self, *args, **kwargs):
        return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs))

    def rolling_window(self, *args, **kwargs):
        return MySeries(pd.rolling_window(self.x, *args, **kwargs))

    def cumprod(self, *args, **kwargs):
        return MySeries(self.x.cumprod(*args, **kwargs))

    def cumsum(self, *args, **kwargs):
        return MySeries(self.x.cumsum(*args, **kwargs))

    def diff(self, *args, **kwargs):
        return MySeries(self.x.diff(*args, **kwargs))

    def div(self, *args, **kwargs):
        return MySeries(self.x.div(*args, **kwargs))

    def mul(self, *args, **kwargs):
        return MySeries(self.x.mul(*args, **kwargs))

    def add(self, *args, **kwargs):
        return MySeries(self.x.add(*args, **kwargs))

    def dropna(self, *args, **kwargs):
        return MySeries(self.x.dropna(*args, **kwargs))
    
    def fillna(self, *args, **kwargs):
        return MySeries(self.x.fillna(*args, **kwargs))

    def floordiv(self, *args, **kwargs):
        return MySeries(self.x.floordiv(*args, **kwargs))

    def mod(self, *args, **kwargs):
        return MySeries(self.x.mod(*args, **kwargs))

    def nlargest(self, *args, **kwargs):
        return MySeries(self.x.nlargest(*args, **kwargs))

    def nonzero(self, *args, **kwargs):
        return MySeries(self.x.nonzero(*args, **kwargs))

    def nsmallest(self, *args, **kwargs):
        return MySeries(self.x.nsmallest(*args, **kwargs))

    def pow(self, *args, **kwargs):
        return MySeries(self.x.pow(*args, **kwargs))

    def rank(self, *args, **kwargs):
        return MySeries(self.x.rank(*args, **kwargs))

    def round(self, *args, **kwargs):
        return MySeries(self.x.round(*args, **kwargs))

    def shift(self, *args, **kwargs):
        return MySeries(self.x.shift(*args, **kwargs))

    def sub(self, *args, **kwargs):
        return MySeries(self.x.sub(*args, **kwargs))

    def abs(self, *args, **kwargs):
        return MySeries(self.x.abs(*args, **kwargs))

    def clip(self, *args, **kwargs):
        return MySeries(self.x.clip(*args, **kwargs))

    def clip_lower(self, *args, **kwargs):
        return MySeries(self.x.clip_lower(*args, **kwargs))

    def clip_upper(self, *args, **kwargs):
        return MySeries(self.x.clip_upper(*args, **kwargs))
    
    def interpolate(self, *args, **kwargs):
        return MySeries(self.x.interpolate(*args, **kwargs))

    def resample(self, *args, **kwargs):
        return MySeries(self.x.resample(*args, **kwargs))
        
    def replace(self, *args, **kwargs):
        return MySeries(self.x.replace(*args, **kwargs))