def _detect(self, data: pd.Series) -> pd.Series: if self._direction == "both": return np.abs(data.diff()) > self._max_diff elif self._direction == "positive": return data.diff() > self._max_diff else: return data.diff() < -self._max_diff
def test_diff_tz(self): # Combined datetime diff, normal diff and boolean diff test ts = tm.makeTimeSeries(name="ts") ts.diff() # neg n result = ts.diff(-1) expected = ts - ts.shift(-1) tm.assert_series_equal(result, expected) # 0 result = ts.diff(0) expected = ts - ts tm.assert_series_equal(result, expected) # datetime diff (GH#3100) s = Series(date_range("20130102", periods=5)) result = s.diff() expected = s - s.shift(1) tm.assert_series_equal(result, expected) # timedelta diff result = result - result.shift(1) # previous result expected = expected.diff() # previously expected tm.assert_series_equal(result, expected) # with tz s = Series(date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo") result = s.diff() expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, expected)
class Timeseries(object): goal_time = 0.2 params = [None, 'US/Eastern'] param_names = ['tz'] def setup(self, tz): self.N = 10**6 self.halfway = ((self.N // 2) - 1) self.s = Series(date_range('20010101', periods=self.N, freq='T', tz=tz)) self.ts = self.s[self.halfway] self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz=tz)) def time_series_timestamp_compare(self, tz): self.s <= self.ts def time_timestamp_series_compare(self, tz): self.ts >= self.s def time_timestamp_ops_diff(self, tz): self.s2.diff() def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift()
def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta s = Series([ Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) assert result == expected result = td.to_frame().mean() assert result[0] == expected result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) assert result == expected result = td.median() expected = to_timedelta('00:00:09') assert result == expected result = td.to_frame().median() assert result[0] == expected # GH#6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') assert result == expected result = td.to_frame().sum() assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) assert result == expected result = td.to_frame().std() assert result[0] == expected # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: msg = "reduction operation '{}' not allowed for this dtype" with pytest.raises(TypeError, match=msg.format(op)): getattr(td, op)() # GH#10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) assert s.diff().median() == timedelta(days=4) s = Series([ Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15') ]) assert s.diff().median() == timedelta(days=6)
def calculate_test_positivity(positive_tests: pd.Series, negative_tests: pd.Series, smooth: int = 7, lag_lookback: int = 7) -> pd.Series: """Calculates positive test rate. Args: positive_tests: Number of cumulative positive tests. negative_tests: Number of cumulative negative tests. Returns: Positive test rate. """ daily_negative_tests = negative_tests.diff() daily_positive_tests = positive_tests.diff() positive_smoothed = series_utils.smooth_with_rolling_average( daily_positive_tests) negative_smoothed = series_utils.smooth_with_rolling_average( daily_negative_tests, include_trailing_zeros=False) last_n_positive = positive_smoothed[-lag_lookback:] last_n_negative = negative_smoothed[-lag_lookback:] if any(last_n_positive) and last_n_negative.isna().all(): return pd.Series([], dtype="float64") return positive_smoothed / (negative_smoothed + positive_smoothed)
def volume_ratio(price: Series, volume: Series, period: int) -> Series: """ 거래량비율 <설명> 거래량비율(Volume Ratio)을 구하는 함수입니다. 거래량비율(Volume Ratio)은 일정 기간 동안의 상승일의 거래량과 하락일의 거래량을 비교합니다. 거래량비율(Volume Ratio)은 0에서 1사이의 값으로 표현됩니다. <사용 방법> 첫 번째 인자에는 거래량비율(Volume Ratio)을 구하는데 사용하는 가격을, 두 번째 인자에는 거래량을, 세 번째 인자에는 거래량비율(Volume Ratio)을 구하는데 사용하는 기간을 적으면 됩니다. 예를 들어, 20일간의 종가를 이용한 거래량비율(Volume Ratio)을 구하고자 하는 경우에는 'volume_ratio(close, volume, 20)' 또는 '거래량비율(종가, 거래량, 20)'과 같이 작성하면 됩니다. :param price: (가격데이터) 거래량비율(Volume Ratio)을 구할 때 사용하는 가격 ex) 시가, 고가, 저가, 종가 :param volume: (거래량) 거래량 :param period: (기간) 거래량비율(Volume Ratio)을 구하는데 사용하는 기간 :return: """ up = np.where(price.diff(1).gt(0), volume, 0) down = np.where(price.diff(1).lt(0), volume, 0) maintain = np.where(price.diff(1).equals(0), volume.mul(0.5), 0) up = up + maintain down = down + maintain sum_up = Series(up).rolling(window=period, min_periods=period).sum() sum_down = Series(down).rolling(window=period, min_periods=period).sum() return sum_up.div(sum_down)
def calc_glider_vert_velocity(time, depth): """ Calculate glider vertical velocity in cm/s Parameters ---------- time : np.array [datetime64] glider time dimension depth : np.array [float] depth (m) or pressure (dbar) if depth not avail Returns ------- velocity : np.array vertical velocity in cm/s """ from numpy import array from pandas import Series # Converting time from datetime 64 to seconds since deployment t_ns = array(time).astype("datetime64[ns]").astype(float) t_s = Series((t_ns - t_ns.min()) / 1e9) # converting pressure from dbar/m to cm p_m = array(depth).astype(float) p_cm = Series(p_m * 100) # velocity in cm/s velocity = p_cm.diff() / t_s.diff() return velocity
def _segment_until(signal1: pd.Series, signal2: pd.Series, s: float, t: float, z_max: float, out: pd.Series = None): z = pd.Series() x = signal1.reindex(signal1.index.union([s, t])).interpolate( 'values', limit_direction='both') dx = -signal1.diff(-1).fillna(0) y = signal2.reindex(signal2.index.union([s, t])).interpolate( 'values', limit_direction='both') dy = -signal2.diff(-1).fillna(0) if dx[s] <= 0: z1 = _compute_segment_and(x, y, s, t) z2 = _compute_partial_eventually(z1, s, t) i = pd.Series() i[s] = min(z_max, x[t]) z = _compute_segment_or(i, z2, s, t) else: z1 = _compute_partial_eventually(y, s, t) z2 = _compute_segment_and(x, z1, s, t) z3 = pd.Series() z3[s] = z_max z1 = _compute_segment_and(x, z3, s, t) z = _compute_segment_or(z1, z2, s, t) z.sort_index(inplace=True) if out is not None and isinstance(out, pd.Series): out.update(z) return z
class Timeseries(object): goal_time = 0.2 params = [None, 'US/Eastern'] param_names = ['tz'] def setup(self, tz): N = 10**6 halfway = (N // 2) - 1 self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz)) self.ts = self.s[halfway] self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz)) def time_series_timestamp_compare(self, tz): self.s <= self.ts def time_timestamp_series_compare(self, tz): self.ts >= self.s def time_timestamp_ops_diff(self, tz): self.s2.diff() def time_timestamp_ops_diff_with_shift(self, tz): self.s - self.s.shift()
def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta s = Series([ Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) self.assertEqual(result, expected) result = td.to_frame().mean() self.assertEqual(result[0], expected) result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) self.assertEqual(result, expected) result = td.median() expected = to_timedelta('00:00:09') self.assertEqual(result, expected) result = td.to_frame().median() self.assertEqual(result[0], expected) # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') self.assertEqual(result, expected) result = td.to_frame().sum() self.assertEqual(result[0], expected) # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) self.assertEqual(result, expected) result = td.to_frame().std() self.assertEqual(result[0], expected) # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: pytest.raises(TypeError, getattr(td, op)) # GH 10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) self.assertEqual(s.diff().median(), timedelta(days=4)) s = Series([ Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15') ]) self.assertEqual(s.diff().median(), timedelta(days=6))
def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta s = Series([Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) assert result == expected result = td.to_frame().mean() assert result[0] == expected result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) assert result == expected result = td.median() expected = to_timedelta('00:00:09') assert result == expected result = td.to_frame().median() assert result[0] == expected # GH#6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') assert result == expected result = td.to_frame().sum() assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) assert result == expected result = td.to_frame().std() assert result[0] == expected # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: msg = "reduction operation '{}' not allowed for this dtype" with pytest.raises(TypeError, match=msg.format(op)): getattr(td, op)() # GH#10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) assert s.diff().median() == timedelta(days=4) s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')]) assert s.diff().median() == timedelta(days=6)
def series_differentiation(series: Series, order: int) -> Series: """ Timeseries differentiation.Usefull for detrend series """ if order == 1: return series.diff() if order == 2: return series.diff().diff() else: print("It doesn't have sense ")
def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta s = Series([Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10)]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) self.assertEqual(result, expected) result = td.to_frame().mean() self.assertEqual(result[0], expected) result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) self.assertEqual(result, expected) result = td.median() expected = to_timedelta('00:00:09') self.assertEqual(result, expected) result = td.to_frame().median() self.assertEqual(result[0], expected) # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') self.assertEqual(result, expected) result = td.to_frame().sum() self.assertEqual(result[0], expected) # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) self.assertEqual(result, expected) result = td.to_frame().std() self.assertEqual(result[0], expected) # invalid ops for op in ['skew', 'kurt', 'sem', 'prod']: self.assertRaises(TypeError, getattr(td, op)) # GH 10040 # make sure NaT is properly handled by median() s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) self.assertEqual(s.diff().median(), timedelta(days=4)) s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), Timestamp('2015-02-15')]) self.assertEqual(s.diff().median(), timedelta(days=6))
def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta s = Series([ Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) assert result == expected result = td.to_frame().mean() assert result[0] == expected result = td.quantile(0.1) expected = Timedelta(np.timedelta64(2600, "ms")) assert result == expected result = td.median() expected = to_timedelta("00:00:09") assert result == expected result = td.to_frame().median() assert result[0] == expected # GH#6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta("00:01:21") assert result == expected result = td.to_frame().sum() assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) assert result == expected result = td.to_frame().std() assert result[0] == expected # GH#10040 # make sure NaT is properly handled by median() s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")]) assert s.diff().median() == timedelta(days=4) s = Series([ Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15") ]) assert s.diff().median() == timedelta(days=6)
def get_bvc_buy_volume(close: pd.Series, volume: pd.Series, window: int = 20) -> pd.Series: """ :param close: (pd.Series): series of close prices :param volume: (pd.Series): series of bar volumes :param window: (int); window for std estimation uses in BVC calculation :return: """ return volume * norm.cdf( close.diff() / close.diff().rolling(window=window).std())
def get_bvc_buy_volume(close: pd.Series, volume: pd.Series, window: int = 20) -> pd.Series: """ Calculates the BVC buy volume :param close: (pd.Series): Close prices :param volume: (pd.Series): Bar volumes :param window: (int): Window for std estimation uses in BVC calculation :return: (pd.Series) BVC buy volume """ # .apply(norm.cdf) is used to omit Warning for norm.cdf(pd.Series with NaNs) return volume * (close.diff() / close.diff().rolling(window=window).std()).apply(norm.cdf)
def adx(high: pd.Series, low: pd.Series, smooth: int = 14, di_len: int = 14): diffHigh, diffLow = high.diff(), low.diff() diffHighAbs, diffLowAbs = diffHigh.abs(), diffLow.abs() posDM, negDM = diffHigh.abs(), diffLow.abs() posDM[(diffHighAbs <= diffLowAbs) | (diffHigh < 0)] = 0.0 negDM[(diffLowAbs <= diffHighAbs) | (diffLow < 0)] = 0.0 atr = average_true_range(high, low, di_len) posDI = modified_moving_average(posDM, smooth) * 100.0 / atr negDI = modified_moving_average(negDM, smooth) * 100.0 / atr dx = (posDI - negDI).abs() * 100.0 / (posDI + negDI) adx = modified_moving_average(dx, smooth) return {'adx': adx, 'pos': posDI, 'neg': negDI}
def calc_td(series: Series, direction: str, show_all: bool): td_bool = series.diff(4) > 0 if direction=="up" else series.diff(4) < 0 td_num = npWhere( td_bool, td_bool.rolling(13, min_periods=0).apply(true_sequence_count), 0 ) td_num = Series(td_num) if show_all: td_num = td_num.mask(td_num == 0) else: td_num = td_num.mask(~td_num.between(6,9)) return td_num
def rsin(x: pd.Series, window: int, ewm: bool, groupfreq: AnyStr = '') -> np.array: """ Calculate normalized relative strength index. """ ma_u = TechAnalysis.moving_avg(np.maximum(x.diff(), 0), window, ewm, groupfreq) ma_l = TechAnalysis.moving_avg(-np.minimum(x.diff(), 0), window, ewm, groupfreq) return (ma_u - ma_l) / (ma_u + ma_l)
def pdi(price_high: Series, price_low: Series, price_close: Series, period: int, moving_average: MovingAverage) -> Series: """ 매수방향지표 <설명> 매수방향지표(PDI)를 구하는 함수입니다. 매수방향지표(PDI)는 실질적으로 상승하는 폭의 비율을 나타냅니다. 매수방향지표(PDI)는 0에서 1사이의 값으로 표현됩니다. <사용방법> 첫 번째 인자에는 고가를, 두 번째 인자에는 저가를, 세 번째 인자에는 종가를, 네 번째 인자에는 매수방향지표(PDI)를 구하는데 사용하는 기간을, 다섯 번째 인자에는 매수방향지표(PDI)를 구하는데 사용하는 이동 평균 종류를 적으면 됩니다. 예를 들어, 지수 이동 평균을 이용한 14일간 매수방향지표(PDI)를 구하고자 하는 경우 'pdi(high, low, close, 14, ema)' 또는 '매수방향지표(고가, 저가, 종가, 14, 지수이동평균)'과 같이 작성하면 됩니다. :param price_high: (고가) 고가 :param price_low: (저가) 저가 :param price_close: (종가) 종가 :param period: (기간) 매수방향지표(PDI)를 구하는데 사용하는 기간 :param moving_average: (이동평균종류) 매수방향지표(PDI)를 구하는데 사용하는 이동 평균 종류 ex) 단순 이동평균, 지수 이동평균, 가중 이동평균 :return: """ pdm = np.where(((price_high.diff(1) > 0) & (price_high.diff(1) > price_low.shift(1) - price_low)), price_high.diff(1), 0) if moving_average == MovingAverage.sma: pdmn = sma(Series(pdm), period) elif moving_average == MovingAverage.ema: pdmn = ema(Series(pdm), period) elif moving_average == MovingAverage.ewma: pdmn = ewma(Series(pdm), period) elif moving_average == MovingAverage.wma: pdmn = wma(Series(pdm), period) tr = _tr(price_high, price_low, price_close) if moving_average == MovingAverage.sma: trn = sma(tr, period) elif moving_average == MovingAverage.ema: trn = ema(tr, period) elif moving_average == MovingAverage.ewma: trn = ewma(tr, period) elif moving_average == MovingAverage.wma: trn = wma(tr, period) return pdmn.divide(trn)
def robust_daily_vol_given_price(price: pd.Series, **kwargs): price = price.resample("1B").ffill() daily_returns = price.diff() vol = robust_vol_calc(daily_returns, **kwargs) return vol
def get_chow_type_stat(series: pd.Series, min_length: int = 20) -> pd.Series: """ Multithread implementation of Chow-Type Dickey-Fuller Test, p.251-252 :param series: (pd.Series) series to test :param min_length: (int) minimum sample length used to estimate statistics :param num_threads: (int): number of cores to use :return: (pd.Series) of Chow-Type Dickey-Fuller Test statistics """ # Indices to test. We drop min_length first and last values molecule = series.index[min_length:series.shape[0] - min_length] molecule = molecule.values molecule_range = np.arange(0, len(molecule)) series_diff = series.diff().dropna() series_diff = series_diff.values series_lag = series.shift(1).dropna() series_lag_values = series_lag.values series_lag_times_ = series_lag.index.values series_lag_values_start = np.where( series_lag_times_ == molecule[0])[0].item() + 1 dfc_series = _get_dfc_for_t(molecule_range, series_lag_values_start, series_diff, series_lag_values) dfc_series = pd.Series(dfc_series, index=molecule) return dfc_series
def __init__(self, prices: dict, trading_signal: Series, initialcash: float = 1000): """ Parameters ----------- prices : instrument price trading_signal : capital to invest (long+,short-) or number of shares initialcash : float = 1000 starting cash """ # first thing to do is to clean up the signal, removing nans and duplicate entries or exits self.trades = trading_signal.diff() # now create internal data structure split = int((len(prices) - 9) * 0.8) self.prices = pd.DataFrame.from_dict( prices, orient="index").iloc[split + 9:, :] self.data = pd.DataFrame( index=self.prices.index, columns=['prices', 'shares', 'value', 'cash', 'pnl']) self.data['prices'] = self.prices self.data['shares'] = self.trades.fillna(0) self.data['value'] = self.data['shares'] * self.data['prices'].fillna( 0) delta = self.data['shares'].diff() # shares bought sold self.data['cash'] = ( -delta * self.data['prices']).fillna(0).cumsum() + initialcash self.data['pnl'] = (self.data['cash'] + self.data['value'] - initialcash).fillna(0)
def __clean_artifacts(data: pd.Series, threshold=0.2) -> pd.Series: """ Cleans obviously illegal IBI values (artefacts) from a list Parameters ---------- data : pd.Series the IBI list threshold : float, optional the maximum relative deviation between subsequent intervals, by default 0.2 Returns ------- pd.Series the cleaned IBIs """ # Artifact detection - Statistical # for index in trange(data.shape[0]): # # Remove RR intervals that differ more than 20% from the previous one # if np.abs(data.iloc[index] - data.iloc[index - 1]) > 0.2 * data.iloc[index]: # data.iloc[index] = np.nan # efficiency instead of loop ;-) diff = data.diff().abs() drop_indices = diff > threshold * data if drop_indices.any(): data.drop(data[drop_indices].index, inplace=True) drop_indices = (data < 250) | (data > 2000) if drop_indices.any(): data.drop(data[drop_indices].index, inplace=True) # drop by bpm > 240 or bpm < 30 data.dropna(inplace=True) # just to be sure return data
def adjacent_to_irregular(df_resampled: pd.Series, samp_freq: int): '''Hàm chuyển đổi từ loại dữ liệu chuỗi 2A (sau tái chọn mẫu), sang loại dữ liệu chuỗi 2B Chỉ áp dụng cho chuỗi kết quả Hypnogram : df_resample: pandas Series, có nội dung là 1 chuỗi liên tục các label, có datetime index, : samp_freq: tần số lấy mẫu, 1 số nguyên, thí dụ 10, 30 : return: 1 dataframe có cấu trúc: mỗi hàng là 1 biến cố, 4 cột: evt_start/stop = thời điểm bắt đầu, thời điểm kết thúc; evt_info: độ dài của biến cố, tính bằng giây, evt_value: Nhãn của biến cố, quy định trong dictionary ''' fs = np.round(float(1 / samp_freq), 5) grp = np.array([[int(k), float(len(list(g)) / samp_freq)] for k, g in (groupby(df_resampled))]) change_point = np.array( df_resampled.diff()[lambda x: x != 0].index.tolist()) df_event = pd.DataFrame({ 'evt_start': change_point, 'evt_stop': np.array([ s + pd.Timedelta(f"{i}S") - pd.Timedelta(f"{fs}S") for s, i in zip(change_point, grp[:, 1]) ]), 'evt_info': grp[:, 1], 'evt_value': grp[:, 0], }) return df_event
def test_timedelta_ops(self): _skip_if_numpy_not_friendly() # GH4984 # make sure ops return timedeltas s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ]) td = s.diff() result = td.mean()[0] # TODO This should have returned a scalar to begin with. Hack for now. expected = to_timedelta(timedelta(seconds=9)) tm.assert_almost_equal(result, expected) result = td.quantile(.1) # This properly returned a scalar. expected = to_timedelta('00:00:02.6') tm.assert_almost_equal(result, expected) result = td.median()[0] # TODO This should have returned a scalar to begin with. Hack for now. expected = to_timedelta('00:00:08') tm.assert_almost_equal(result, expected) # GH 6462 # consistency in returned values for sum result = td.sum()[0] expected = to_timedelta('00:01:21') tm.assert_almost_equal(result, expected)
def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta s = Series([ Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) self.assertEqual(result, expected) result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600, 'ms')) self.assertEqual(result, expected) result = td.median() expected = to_timedelta('00:00:08') self.assertEqual(result, expected) # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') tm.assert_almost_equal(result, expected) self.assertEqual(result, expected)
def unsigned_differences(series: Series, amount: int = None, **kwargs) -> Series: """Unsigned Differences Returns two Series, an unsigned positive and unsigned negative series based on the differences of the original series. The positive series are only the increases and the negative series is only the decreases. Default Example: series = Series([3, 2, 2, 1, 1, 5, 6, 6, 7, 5, 3]) and returns postive = Series([0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0]) negative = Series([0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1]) """ amount = int(amount) if amount is not None else 1 negative = series.diff(amount) negative.fillna(0, inplace=True) positive = negative.copy() positive[positive <= 0] = 0 positive[positive > 0] = 1 negative[negative >= 0] = 0 negative[negative < 0] = 1 if kwargs.pop("asint", False): positive = positive.astype(int) negative = negative.astype(int) return positive, negative
def psychological_line(price: Series, period: int) -> Series: """ 투자심리도 <설명> 투자심리도(Psychological Line)를 구하는 함수입니다. 투자심리도(Psychological Line)를 이용하면 과열 및 침체도를 파악할 수 있습니다. 투자심리도(Psychological Line)는 0에서 1사이의 값으로 표현됩니다. <사용 방법> 첫 번째 인자에는 투자심리도(Psychological Line)를 구하는데 사용하는 가격을, 두 번째 인자에는 투자심리도(Psychological Line)를 구하는데 사용하는 기간을 적으면 됩니다. 예를 들어, 10일간의 종가를 이용한 투자심리도(Psychological Line)를 구하고자 하는 경우에는 'psychological_line(close, 10)' 또는 '투자심리도(종가, 10)'과 같이 작성하면 됩니다. <계산 방법> 10일간의 종가를 이용한 투자심리도(Psychological Line)는 다음과 같이 구합니다. (10일간 전일 종가 대비 상승 일수) / 10 :param price: (가격데이터) 투자심리도(Psychological Line)를 구할 때 사용하는 가격 ex) 시가, 고가, 저가, 종가 :param period: (기간) 투자심리도(Psychological Line)를 구하는데 사용하는 기간 :return: """ up = np.where(price.diff(1).gt(0), 1, 0) sum_up = Series(up).rolling(window=period, min_periods=period).sum() return sum_up.divide(period)
def import_bid_ask_data(): rootdir='I:/tickhdf_stk' savedir='F:/data/xccdata/bid_ask' #rootdir = '/Users/harbes/data/xccdata/bid_ask' # rootdir = 'F:/data/xccdata/bid_ask' li_ = [i for i in os.listdir(rootdir) if not i.endswith('_') and not i.endswith('.h5')] # 列出文件夹下所有的目录与文件 os.mkdir(savedir + '/effective_spread_') # 生成文件夹 now0 = time.time() for i in li_[150:]: # Mac要额外注意 # Series&np.array 一天数据大约需要12s # path = rootdir + '/' + i f = h5py.File(rootdir+ '/' + i, 'r') effective_spread = Series(np.nan, index=np.array(f['stk'])) for stk in f['stk']: # ['603611']:#['000031']:# ['000504'] bid = np.array(f['stk'][stk]['bidPrc_1']) # Series(f['stk'][stk]['bidPrc_1']) # ask = np.array(f['stk'][stk]['askPrc_1']) # Series(f['stk'][stk]['askPrc_1'])# prc = np.array(f['stk'][stk]['lastPrc']) # Series(f['stk'][stk]['lastPrc']) # volume = Series(f['stk'][stk]['volume']) # np.array(f['stk'][stk]['volume'])[(bid>0) & (ask>0)] # volume = volume.diff(1).fillna(volume[0]) # DataFrame({'bid': bid, 'ask': ask, 'prc': prc, 'volume': volume})#, 'trend':trend}) tmp = np.sum((volume * prc)[(bid > 0) & (ask > 0)]) effective_spread[stk] = 0 if tmp == 0 else 2 * np.sum( (np.abs(2 * prc / (bid + ask) - 1) * volume * prc)[(bid > 0) & (ask > 0)]) / tmp # effective_spread[effective_spread <= 0] = np.nan # 也可以把所有数据归总后再设置 effective_spread.to_pickle(savedir + '/effective_spread_/' + i) print(time.time() - now0)
def test_timedelta_ops(self): # GH4984 # make sure ops return timedeltas s = Series([ Timestamp('20130101') + timedelta(seconds=i * i) for i in range(10) ]) td = s.diff() result = td.mean()[0] # TODO This should have returned a scalar to begin with. Hack for now. expected = to_timedelta(timedelta(seconds=9)) tm.assert_almost_equal(result, expected) result = td.quantile(.1) # This properly returned a scalar. expected = np.timedelta64(2599999999, 'ns') tm.assert_almost_equal(result, expected) result = td.median()[0] # TODO This should have returned a scalar to begin with. Hack for now. expected = to_timedelta('00:00:08') tm.assert_almost_equal(result, expected) # GH 6462 # consistency in returned values for sum result = td.sum()[0] expected = to_timedelta('00:01:21') tm.assert_almost_equal(result, expected)
def robust_daily_vol_given_price(price: pd.Series, **kwargs): price = prices_to_daily_prices(price) daily_returns = price.diff() vol = robust_vol_calc(daily_returns, **kwargs) return vol
def _gradient(self, data: pd.Series, periods: int = 1) -> pd.Series: dt = data.index.to_series().diff().dt.total_seconds() if dt.min() < 1e-15: raise ValueError("Input must be monotonic increasing") gradient = data.diff(periods=periods) / dt return gradient
def plot_time(time: pandas.Series): """ make a probability density function estimate based on the data in this simulation, time interval is same distribution for all sensors and rooms https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html """ intervals = time.diff().dropna().dt.total_seconds() Nbin = 100 Fa, Floc, Fscale = gamma.fit(intervals) ti = np.arange(0.01, 5, 0.01) # arbitrary time interval range to plot over pd = gamma.pdf(ti, Fa, loc=Floc, scale=Fscale) # fit ax = plt.figure().gca() ax.plot(ti, pd) ax.set_xlabel("Time Interval (seconds)") ax.set_ylabel("Probability") ax.set_title("Time interval observed") # add the measured data to the plot ax.hist(intervals, bins=Nbin)
def consecutive_wins_losses(self): ''' Calculates the positive and negative runs in the trade series. ''' trade_df = self.as_dataframe().sort_values(by = 'exit') win_loss = sign(trade_df.base_return) # Create series which has just 1's and 0's positive = Series(hstack(([0], ((win_loss > 0) * 1).values, [0]))) negative = Series(hstack(([0], ((win_loss < 0) * 1).values, [0]))) pos_starts = positive.where(positive.diff() > 0) pos_starts = Series(pos_starts.dropna().index.tolist()) pos_ends = positive.where(positive.diff() < 0) pos_ends = Series(pos_ends.dropna().index.tolist()) positive_runs = pos_ends - pos_starts neg_starts = negative.where(negative.diff() > 0) neg_starts = Series(neg_starts.dropna().index.tolist()) neg_ends = negative.where(negative.diff() < 0) neg_ends = Series(neg_ends.dropna().index.tolist()) negative_runs = neg_ends - neg_starts return (positive_runs, negative_runs)
def test_diff(self): # Just run the function self.ts.diff() # int dtype a = 10000000000000000 b = a + 1 s = Series([a, b]) rs = s.diff() self.assertEqual(rs[1], 1) # neg n rs = self.ts.diff(-1) xp = self.ts - self.ts.shift(-1) assert_series_equal(rs, xp) # 0 rs = self.ts.diff(0) xp = self.ts - self.ts assert_series_equal(rs, xp) # datetime diff (GH3100) s = Series(date_range('20130102', periods=5)) rs = s - s.shift(1) xp = s.diff() assert_series_equal(rs, xp) # timedelta diff nrs = rs - rs.shift(1) nxp = xp.diff() assert_series_equal(nrs, nxp) # with tz s = Series( date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s.diff() assert_series_equal(result, Series( TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
def test_timedelta_fillna(self): # GH 3371 s = Series([Timestamp("20130101"), Timestamp("20130101"), Timestamp("20130102"), Timestamp("20130103 9:01:01")]) td = s.diff() # reg fillna result = td.fillna(0) expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) # interprested as seconds result = td.fillna(1) expected = Series( [timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)] ) assert_series_equal(result, expected) result = td.fillna(timedelta(days=1, seconds=1)) expected = Series( [timedelta(days=1, seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)] ) assert_series_equal(result, expected) result = td.fillna(np.timedelta64(int(1e9))) expected = Series( [timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)] ) assert_series_equal(result, expected) from pandas import tslib result = td.fillna(tslib.NaT) expected = Series( [tslib.NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)], dtype="m8[ns]" ) assert_series_equal(result, expected) # ffill td[2] = np.nan result = td.ffill() expected = td.fillna(0) expected[0] = np.nan assert_series_equal(result, expected) # bfill td[2] = np.nan result = td.bfill() expected = td.fillna(0) expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) assert_series_equal(result, expected)
def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta s = Series([Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) self.assertEqual(result, expected) result = td.to_frame().mean() self.assertEqual(result[0], expected) result = td.quantile(0.1) expected = Timedelta(np.timedelta64(2600, "ms")) self.assertEqual(result, expected) result = td.median() expected = to_timedelta("00:00:08") self.assertEqual(result, expected) result = td.to_frame().median() self.assertEqual(result[0], expected) # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta("00:01:21") self.assertEqual(result, expected) result = td.to_frame().sum() self.assertEqual(result[0], expected) # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) self.assertEqual(result, expected) result = td.to_frame().std() self.assertEqual(result[0], expected) # invalid ops for op in ["skew", "kurt", "sem", "var", "prod"]: self.assertRaises(TypeError, lambda: getattr(td, op)())
def test_timedelta_ops(self): _skip_if_numpy_not_friendly() # GH4984 # make sure ops return timedeltas s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) tm.assert_series_equal(result, expected) result = td.quantile(.1) expected = to_timedelta('00:00:02.6') tm.assert_series_equal(result, expected) result = td.median() expected = to_timedelta('00:00:08') tm.assert_series_equal(result, expected)
def test_timedelta_ops(self): # GH4984 # make sure ops return Timedelta s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ]) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) self.assertEqual(result, expected) result = td.quantile(.1) expected = Timedelta(np.timedelta64(2600,'ms')) self.assertEqual(result, expected) result = td.median() expected = to_timedelta('00:00:08') self.assertEqual(result, expected) # GH 6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta('00:01:21') tm.assert_almost_equal(result, expected) self.assertEqual(result, expected)
K 128.09496 L 113.08406 M 131.04049 N 114.04293 P 97.05276 Q 128.05858 R 156.10111 S 87.03203 T 101.04768 V 99.06841 W 186.07931 Y 163.06333''' import pandas as pd; from pandas import DataFrame, Series import numpy as np t = [i.rstrip().split() for i in open('monoisotopic_mass_table.txt').readlines()] mass = DataFrame(t, columns=['residue', 'mass'], dtype=float) residue = np.array(mass.residue) mass = np.array(mass.mass) spec = Series(open('rosalind_spec.txt').readlines(), dtype=float) increment = np.array(spec.diff()[1:]) peptide = ''.join([residue[np.where(abs(mass - i) < 0.0001)[0][0]] for i in increment]) f = open('rosalind_spec_sub.txt', 'wt') f.write(peptide) f.close()
def turnover(series:pd.Series): ratios = series.diff().abs() / series.abs().rolling(window=system.n_bday_in_3m).mean() * system.n_bday_in_year return ratios
class MySeries: def __init__(self, *args, **kwargs): self.x = Series(*args, **kwargs) self.values = self.x.values self.index = self.x.index def rolling_mean(self, *args, **kwargs): return MySeries(pd.rolling_mean(self.x, *args, **kwargs)) def rolling_count(self, *args, **kwargs): return MySeries(pd.rolling_count(self.x, *args, **kwargs)) def rolling_sum(self, *args, **kwargs): return MySeries(pd.rolling_sum(self.x, *args, **kwargs)) def rolling_median(self, *args, **kwargs): return MySeries(pd.rolling_median(self.x, *args, **kwargs)) def rolling_min(self, *args, **kwargs): return MySeries(pd.rolling_min(self.x, *args, **kwargs)) def rolling_max(self, *args, **kwargs): return MySeries(pd.rolling_max(self.x, *args, **kwargs)) def rolling_std(self, *args, **kwargs): return MySeries(pd.rolling_std(self.x, *args, **kwargs)) def rolling_var(self, *args, **kwargs): return MySeries(pd.rolling_var(self.x, *args, **kwargs)) def rolling_skew(self, *args, **kwargs): return MySeries(pd.rolling_skew(self.x, *args, **kwargs)) def rolling_kurtosis(self, *args, **kwargs): return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs)) def rolling_window(self, *args, **kwargs): return MySeries(pd.rolling_window(self.x, *args, **kwargs)) def cumprod(self, *args, **kwargs): return MySeries(self.x.cumprod(*args, **kwargs)) def cumsum(self, *args, **kwargs): return MySeries(self.x.cumsum(*args, **kwargs)) def diff(self, *args, **kwargs): return MySeries(self.x.diff(*args, **kwargs)) def div(self, *args, **kwargs): return MySeries(self.x.div(*args, **kwargs)) def mul(self, *args, **kwargs): return MySeries(self.x.mul(*args, **kwargs)) def add(self, *args, **kwargs): return MySeries(self.x.add(*args, **kwargs)) def dropna(self, *args, **kwargs): return MySeries(self.x.dropna(*args, **kwargs)) def fillna(self, *args, **kwargs): return MySeries(self.x.fillna(*args, **kwargs)) def floordiv(self, *args, **kwargs): return MySeries(self.x.floordiv(*args, **kwargs)) def mod(self, *args, **kwargs): return MySeries(self.x.mod(*args, **kwargs)) def nlargest(self, *args, **kwargs): return MySeries(self.x.nlargest(*args, **kwargs)) def nonzero(self, *args, **kwargs): return MySeries(self.x.nonzero(*args, **kwargs)) def nsmallest(self, *args, **kwargs): return MySeries(self.x.nsmallest(*args, **kwargs)) def pow(self, *args, **kwargs): return MySeries(self.x.pow(*args, **kwargs)) def rank(self, *args, **kwargs): return MySeries(self.x.rank(*args, **kwargs)) def round(self, *args, **kwargs): return MySeries(self.x.round(*args, **kwargs)) def shift(self, *args, **kwargs): return MySeries(self.x.shift(*args, **kwargs)) def sub(self, *args, **kwargs): return MySeries(self.x.sub(*args, **kwargs)) def abs(self, *args, **kwargs): return MySeries(self.x.abs(*args, **kwargs)) def clip(self, *args, **kwargs): return MySeries(self.x.clip(*args, **kwargs)) def clip_lower(self, *args, **kwargs): return MySeries(self.x.clip_lower(*args, **kwargs)) def clip_upper(self, *args, **kwargs): return MySeries(self.x.clip_upper(*args, **kwargs)) def interpolate(self, *args, **kwargs): return MySeries(self.x.interpolate(*args, **kwargs)) def resample(self, *args, **kwargs): return MySeries(self.x.resample(*args, **kwargs)) def replace(self, *args, **kwargs): return MySeries(self.x.replace(*args, **kwargs))