def get_options_for_today(): """get_options_for_today Get a list of option expiration nodes where the last cell has the current option cycle's expiration date. """ cur_date = datetime.datetime.now() cycle_exps = historical_options() previous_exp = None valid_option_exps = [] for idx, org_exp_date_str in enumerate(cycle_exps): log.debug(f'cycle={idx} expiration={org_exp_date_str}') exp_date = datetime.datetime.strptime(org_exp_date_str, '%m-%d-%Y') exp_date_str = exp_date.strftime('%Y-%m-%d') cycle_start_date = exp_date - pd_bday.BDay(19) if previous_exp: cycle_start_date = previous_exp + pd_bday.BDay(1) cycle_start_date_str = cycle_start_date.strftime('%m-%d-%Y') valid_option_exps.append({ 'exp_date': exp_date, 'exp_date_str': exp_date_str, 'cycle_start': cycle_start_date, 'cycle_start_str': cycle_start_date_str }) if cur_date < exp_date: break previous_exp = exp_date # end of for all historical options return valid_option_exps
def process_symbol(sec_name): print("Processing %s" % sec_name) instrument = isecurity.factory(sec_name) md_dict = dict() for key in instrument.metadata.keys(): md_dict[key] = instrument.metadata[key] # Create a frame from the expiry map, which will have contract names as the index df = pandas.Series(md_dict['expiry_map']) df = df.map(lambda x: pandas.Period(x, freq="B")) df = df.to_frame('expiration') df.index.name = 'contract' # Add a column for the inception dates # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future inceptions = md_dict.get('inception_map', None) if inceptions is not None: contract_inception = pandas.Series(inceptions) contract_inception = contract_inception.map( lambda x: pandas.Period(x, freq="B")) df.loc[:, 'inception'] = contract_inception else: df.loc[:, 'inception'] = None # Gather start/end stats for all relevant contracts contracts_info = OrderedDict() for contract_name in df.index: if contract_name in BAD_CONTRACTS: continue METADATA[contract_name] = sec_name expiry = df.loc[contract_name, 'expiration'] end_p = indaux.apply_offset(expiry, offsets.BDay()) if end_p <= DEEP_PAST: continue # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future. # inception = df.loc[contract_name, 'inception'] # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices. start_p = max( indaux.apply_offset(expiry, -BDAYS_PER_CONTRACT * offsets.BDay()), DEEP_PAST) # Convert to timezone-aware timestamps and naive datetimes (for efficient use elsewhere) start_ts = start_p.to_timestamp().tz_localize(pytz.UTC) end_ts = end_p.to_timestamp().tz_localize(pytz.UTC) start_dt = start_p.to_timestamp().to_datetime() end_dt = end_p.to_timestamp().to_datetime() contracts_info[contract_name] = (start_ts, end_ts, start_dt, end_dt) get_all_intraday_data(instrument, contracts_info)
def t2_shift(date, index): """Рассчитывает эксдивидендную дату для режима T-2 на основании даты закрытия реестра Если дата не содержится индексе цен, то необходимо найти предыдущую из индекса цен. После этого взять сдвинутую на 1 назад дату. Если дата находится в будущем за пределом истории котировок, то достаточно сдвинуть на 1 бизнес дня назад - упрощенный подход, который может не корректно работать из-за праздников """ if date <= index[-1]: position = index.get_loc(date, 'ffill') return index[position - T2] # Выходной гарантированно заменяем бизнес днем next_b_day = date + offsets.BDay() return next_b_day - (T2 + 1) * offsets.BDay()
def t2_shift(date: pd.Timestamp, index: pd.DatetimeIndex): """Рассчитывает эксдивидендную дату для режима T-2 на основании даты закрытия реестра. Если дата не содержится в индексе цен, то необходимо найти предыдущую из индекса цен. После этого взять сдвинутую на 1 назад дату. Если дата находится в будущем за пределом истории котировок, то достаточно сдвинуть на 1 бизнес день назад - упрощенный подход, который может не корректно работать из-за праздников. """ if date <= index[-1]: position = index.get_loc(date, "ffill") return index[position - 1] # Часть дивидендов приходится на выходной, поэтому нельзя просто сдвинуться на один бизнес день назад # Сначала двигаемся на следующий бизнес день, а потом на два бизнес дня назад next_b_day = date + offsets.BDay() return next_b_day - 2 * offsets.BDay()
def _t2_shift(date: pd.Timestamp, index: pd.DatetimeIndex) -> pd.Timestamp: """Рассчитывает эксдивидендную дату для режима T-2 на основании даты закрытия реестра. Если дата не содержится в индексе цен, то необходимо найти предыдущую из индекса цен. После этого взять сдвинутую на 1 назад дату. Если дата находится в будущем за пределом истории котировок, то нужно сдвинуть на 1 бизнес день вперед и на два назад. Это не эквивалентно сдвигу на один день назад для выходных. """ if date <= index[-1]: position = index.get_loc(date, "ffill") return index[position - 1] next_b_day = date + offsets.BDay() return next_b_day - 2 * offsets.BDay()
def _construct_bt_dt_index(self): """ constructs the t0 dates index that runs from t0 to T if the price series is longer than the weights series use the the l The function takes the weights index and prepends it either with the last available previous date from the price index or else prepends t1 with a timedelta """ dt_t0_tmp = self.price_date_index.copy() # where is first date first_weight_date = self.trading_dt_index[0] if dt_t0_tmp[0] < first_weight_date: # prices start before first weight date, bt index starts at date closest to weight date start # initialization date t0 is the date closest to the date of the first weight t1 initialization_date_index = dt_t0_tmp.get_loc(first_weight_date) - 1 dates_t0_index = dt_t0_tmp[initialization_date_index:] else: freq = self.frequency if freq == 'B': initialization_date = dt_t0_tmp[0] - time_offset.BDay(1) elif freq == 'D': initialization_date = dt_t0_tmp[0] - time_offset.Day(1) elif freq == 'min': initialization_date = dt_t0_tmp[0] - time_offset.Minute(1) elif freq == 'H': initialization_date = dt_t0_tmp[0] - time_offset.Hour(1) else: import pdb pdb.set_trace() assert freq == 'S' initialization_date = dt_t0_tmp[0] - time_offset.Second(1) # prepend index with "artificial" first datetime; interval chosen to match frequency of price index dates_t0_index = dt_t0_tmp.append(pd.DatetimeIndex([initialization_date])).sort_values() return dates_t0_index
def test_tshift(self): # PeriodIndex ps = tm.makePeriodFrame() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(unshifted, ps) shifted2 = ps.tshift(freq='B') assert_frame_equal(shifted, shifted2) shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(self.tsframe, unshifted) shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq) assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame(self.tsframe.values, Index(np.asarray(self.tsframe.index)), columns=self.tsframe.columns) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(shifted, self.tsframe.tshift(1)) assert_frame_equal(unshifted, inferred_ts) no_freq = self.tsframe.iloc[[0, 5, 7], :] self.assertRaises(ValueError, no_freq.tshift)
def daily(universe=None): """Spreading Daily Run (send email once done)""" logger = logging.getLogger(__name__) day = (dt.datetime.today() + off.BDay(1)).strftime('%Y-%m-%d') logger.info('Daily mode (spreading) for: {}!'.format(day)) # Get spreading book oxe.init(books['spreading'].book) # Display traded list ldt = books['spreading'].daily_trade(universe, day) # Display additions/deletions adds, dels = oxu.daily_differences(universe, day) adds = 'Additions: {}'.format(adds) logger.info(adds) dels = 'Deletions: {}'.format(dels) logger.info(dels) # Any spreads within 10 days of LTD? ltd10 = ldt[ldt['DaysTo'] <= 10]['Ticker'].tolist() # Calculate indicators (roll yield, stddevs, etc...) books['spreading'].update_risk(universe, day) # Send email subject = '{} - Spreading Daily Run'.format( dt.datetime.today().strftime('%Y-%m-%d')) message = 'Run for: {}\n\n'.format(day) message += 'Daily trades:\n{}\n\n{}\n{}\n\n'.format(ldt, adds, dels) if len(ltd10) > 0: message += 'WARNING: Within 10 days of FND/LTD: {}'.format(ltd10) om.send_email(subject, message, '*****@*****.**', text_only=True)
def daily_trade(universe=None, date=None, days_back=90): """Returns a list of all the current traded spread (which are set to Trade in the universe) :param universe: list: Not necessary, if not specified will use Excel. :param date: If specified, returns the list for a specific date (today is set to default) :param days_back: int - Number of business days to expiration :return: DataFrame """ log = logging.getLogger(__name__) log.debug('Daily Trade list') universe = list( get_universe()['CTicker']) if universe is None else universe day = dt.datetime.now().strftime('%Y-%m-%d') if date is None else date dfs = [] for ct in universe: fc = cc.FutureChain(ct, i.FutureType.Spread) fc.initialize_contracts(cc.Status.Active, nac=4) # Trade the first 2 spreads dfs.append(fc.chain) rdf = pd.concat(dfs) rdf['DaysTo'] = rdf.apply( lambda x: len(pd.date_range(day, x['LastDate'], freq=o.BDay())), axis=1) rdf = rdf[rdf['DaysTo'] <= days_back + 3] # Added 3 return rdf.reset_index(drop=True)
def test_asfreq2(self, frame_or_series): ts = frame_or_series( [0.0, 1.0, 2.0], index=DatetimeIndex( [ datetime(2009, 10, 30), datetime(2009, 11, 30), datetime(2009, 12, 31), ], freq="BM", ), ) daily_ts = ts.asfreq("B") monthly_ts = daily_ts.asfreq("BM") tm.assert_equal(monthly_ts, ts) daily_ts = ts.asfreq("B", method="pad") monthly_ts = daily_ts.asfreq("BM") tm.assert_equal(monthly_ts, ts) daily_ts = ts.asfreq(offsets.BDay()) monthly_ts = daily_ts.asfreq(offsets.BMonthEnd()) tm.assert_equal(monthly_ts, ts) result = ts[:0].asfreq("M") assert len(result) == 0 assert result is not ts if frame_or_series is Series: daily_ts = ts.asfreq("D", fill_value=-1) result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() tm.assert_series_equal(result, expected)
def _move_back_if_holidays(self, workday_candidate): check_holidays = True while check_holidays: is_holiday = [ x in self.holidays_calendar for x in workday_candidate ] workday_candidate[is_holiday] -= offsets.BDay(1) check_holidays = sum(is_holiday) > 0
def get_tax_date(self, transactions_date: Series) -> Series: """ :param transactions_date: pandas column of dates :return: pandas column of tax dates """ workday_candidate = transactions_date - offsets.BDay(1) self._move_back_if_holidays(workday_candidate) return workday_candidate
def test_time_rule_series(raw, series): win = 25 minp = 10 ser = series[::2].resample("B").mean() series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_series = series[::2].truncate(prev_date, last_date) tm.assert_almost_equal(series_result[-1], np.mean(trunc_series))
def get_start_end(self, extra_days=False): """Get Start and End dates for the chain :param extra_days: bool - Add some extra days on either side (used for Reference Creation) :return: str - Start and End """ try: keys = list(self.data.keys()) start = self.data[keys[0]].index[0].to_pydatetime() end = dt.datetime.strptime(self.chain.iloc[-1]['LastDate'], '%Y-%m-%d') if extra_days: start -= o.BDay(10) end += o.BDay(1) self.log.info('First Contract: {} - Last Contract: {}'.format( keys[0], keys[-1])) return start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d') except Exception: raise ChainError('Problem while accessing start/end!')
def test_time_rule_series(series, compare_func, roll_func, kwargs, minp): win = 25 ser = series[::2].resample("B").mean() series_result = getattr(ser.rolling(window=win, min_periods=minp), roll_func)(**kwargs) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_series = series[::2].truncate(prev_date, last_date) tm.assert_almost_equal(series_result[-1], compare_func(trunc_series))
def test_time_rule_series(series, q): compare_func = partial(scoreatpercentile, per=q) win = 25 ser = series[::2].resample("B").mean() series_result = ser.rolling(window=win, min_periods=10).quantile(q) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_series = series[::2].truncate(prev_date, last_date) tm.assert_almost_equal(series_result[-1], compare_func(trunc_series))
def get_table(date, stem, future_type, status, we_trade=True, nac=4, ncb=1, data_download=False): """Generate a table of all the contracts available for a market and the associated last date. :param date: str - Date reference for the active or expired contracts :param stem: str - Market Stem :param future_type: Enum FutureType - Type of contract :param status: Enum Status - Status needed for the chain :param we_trade: bool - Whether or not the contract is considered as traded :param nac: int - Number of active contracts to get (depending on the markets) :param ncb: int - Number of contracts in between (not for outrights) :param data_download: bool - Option for data download (as we need to use LTD for completeness) :return: A dataframe of all the available tickers with the last date """ ld_rule = 'LTD' if data_download else oci.get(stem, 'Reference') # Get the contract table ct_df = oci.ctrmth(stem, we_trade) # Generate the contracts contracts = [] active = 0 min_ctrmth = 20000000 if future_type == oci.FutureType.Outright else 20070000 for index, row in ct_df[ct_df['CtrMth'] > min_ctrmth].iterrows(): cts = generate_contracts(stem, row['CtrMth'], future_type, ct_df, ncb) for c in cts: if not isinstance(row[ld_rule], str): raise ChainError('Problem in CtrMth table for: {}!'.format(c)) # Add 5 days to the end if ActivePlus Status (to continue downloading data for expired contracts) end = ( dt.datetime.strptime(row[ld_rule], '%Y-%m-%d') + o.BDay(5) ).strftime( '%Y-%m-%d') if status == Status.ActivePlus else row[ld_rule] # Add the contract depending on the status if status == Status.Active or status == Status.ActiveLive or status == Status.ActivePlus: if date > end or active >= nac * ncb: continue if date < row[ ld_rule]: # Not counting expired contracts for data download in ActivePlus mode active += 1 elif status == Status.Expired and date < row[ld_rule]: break # Add the contract to the list of contracts contracts.append( co.OrderedDict({ 'Ticker': c, 'LastDate': row[ld_rule] })) return pd.DataFrame(contracts)
def test_time_rule_series(series, sp_func, roll_func): import scipy.stats compare_func = partial(getattr(scipy.stats, sp_func), bias=False) win = 25 ser = series[::2].resample("B").mean() series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)() last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_series = series[::2].truncate(prev_date, last_date) tm.assert_almost_equal(series_result[-1], compare_func(trunc_series))
def test_time_rule_frame(raw, frame, q): compare_func = partial(scoreatpercentile, per=q) win = 25 frm = frame[::2].resample("B").mean() frame_result = frm.rolling(window=win, min_periods=10).quantile(q) last_date = frame_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_frame = frame[::2].truncate(prev_date, last_date) tm.assert_series_equal( frame_result.xs(last_date), trunc_frame.apply(compare_func, raw=raw), check_names=False, )
def test_time_rule_frame(raw, frame): win = 25 minp = 10 frm = frame[::2].resample("B").mean() frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw) last_date = frame_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_frame = frame[::2].truncate(prev_date, last_date) tm.assert_series_equal( frame_result.xs(last_date), trunc_frame.apply(np.mean, raw=raw), check_names=False, )
def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp): win = 25 frm = frame[::2].resample("B").mean() frame_result = getattr(frm.rolling(window=win, min_periods=minp), roll_func)(**kwargs) last_date = frame_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_frame = frame[::2].truncate(prev_date, last_date) tm.assert_series_equal( frame_result.xs(last_date), trunc_frame.apply(compare_func, raw=raw), check_names=False, )
def test_time_rule_frame(raw, frame, sp_func, roll_func): import scipy.stats compare_func = partial(getattr(scipy.stats, sp_func), bias=False) win = 25 frm = frame[::2].resample("B").mean() frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)() last_date = frame_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_frame = frame[::2].truncate(prev_date, last_date) tm.assert_series_equal( frame_result.xs(last_date), trunc_frame.apply(compare_func, raw=raw), check_names=False, )
def test_start_end_fields(self, ts): assert ts.is_year_start assert ts.is_quarter_start assert ts.is_month_start assert not ts.is_year_end assert not ts.is_month_end assert not ts.is_month_end freq = offsets.BDay() ts._set_freq(freq) # 2016-01-01 is a Friday, so is year/quarter/month start with this freq msg = "Timestamp.freq is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): assert ts.is_year_start assert ts.is_quarter_start assert ts.is_month_start assert not ts.is_year_end assert not ts.is_month_end assert not ts.is_month_end
def test_tshift(self): # PeriodIndex ps = tm.makePeriodFrame() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(unshifted, ps) shifted2 = ps.tshift(freq="B") assert_frame_equal(shifted, shifted2) shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) with pytest.raises(ValueError, match="does not match"): ps.tshift(freq="M") # DatetimeIndex shifted = self.tsframe.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(self.tsframe, unshifted) shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq) assert_frame_equal(shifted, shifted2) inferred_ts = DataFrame( self.tsframe.values, Index(np.asarray(self.tsframe.index)), columns=self.tsframe.columns, ) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_frame_equal(shifted, self.tsframe.tshift(1)) assert_frame_equal(unshifted, inferred_ts) no_freq = self.tsframe.iloc[[0, 5, 7], :] msg = "Freq was not given and was not set in the index" with pytest.raises(ValueError, match=msg): no_freq.tshift()
def download_list(status, symbols=None, interval='daily', override_last=False): """Download all data for a specific list as defined in the list module. :param status: Enum Status - Active/ActivePlus/All/Expired Status :param symbols: List of symbols to download (Stem should be provided) - If not provided, will go through the universe :param interval: String data interval, possible values: 'minute', 'daily' or both :param override_last: bool - To force re-downloading data for current day """ log = logging.getLogger(__name__) log.info('Download {}'.format(interval)) symbols = oci.json_db if symbols is None else symbols for m in symbols: log.debug('Download data for {}'.format(m)) # Generate tickers for download sdf = ocl.generate_tickers_df(m, status) # Go through all tickers for idx, row in sdf.iterrows(): ticker = row['Ticker'] # Get last entry odf = odu.get_market_df(ticker) last_date = '1900-01-01' start = '1900-01-01' if odf is not None: last_date = dt.datetime.strftime(odf.index[-1], '%Y-%m-%d') start = dt.datetime.strftime(odf.index[-1] - o.BDay(3), '%Y-%m-%d') if last_date != dt.datetime.today().strftime( '%Y-%m-%d') or override_last: # Download Data & Save to the file df = get_ohlcv_data(ticker, interval, start, dt.datetime.today().strftime('%Y-%m-%d')) if df is not None: odu.save_market_df(ticker, df) else: log.info( 'Do not download {} as it has already been downloaded'. format(ticker))
def daily_differences(universe=None, date=None): """ Daily differences in the symbols. Tracks additions and deletions (new symbol and expired). :param universe: list: Not necessary, if not specified will use Excel. :param date: If specified, returns the differences for a specific date (today is set to default) :return: list - Tuple of lists """ log = logging.getLogger(__name__) day = dt.datetime.today() if date is None else dt.datetime.strptime(date, '%Y-%m-%d') to = oxs.daily_trade(universe, day.strftime('%Y-%m-%d')) ye = oxs.daily_trade(universe, (day - o.BDay(1)).strftime('%Y-%m-%d')) sto = set(to['Ticker']) log.debug('Today''s Markets: {}'.format(sto)) sye = set(ye['Ticker']) log.debug('Yesterday''s Markets: {}'.format(sye)) # Adds - Dels adds = list(sto.difference(sye)) dels = list(sye.difference(sto)) log.info('Additions (to check): {}'.format(adds)) log.info('Deletions (to check): {}'.format(dels)) return adds, dels
def momentum(df, h=252): df.index = pd.to_datetime(df.index) df_mom = df.pct_change(freq=offsets.BDay(h)) return df_mom
def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) self.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(5) assert_series_equal(shiftedFrame['A'], shiftedSeries) shiftedFrame = self.tsframe.shift(-5) self.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe['A'].shift(-5) assert_series_equal(shiftedFrame['A'], shiftedSeries) # shift by 0 unshifted = self.tsframe.shift(0) assert_frame_equal(unshifted, self.tsframe) # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) self.assertEqual(len(shiftedFrame), len(self.tsframe)) shiftedFrame2 = self.tsframe.shift(5, freq='B') assert_frame_equal(shiftedFrame, shiftedFrame2) d = self.tsframe.index[0] shifted_d = d + offsets.BDay(5) assert_series_equal(self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False) # shift int frame int_shifted = self.intframe.shift(1) # noqa # Shifting with PeriodIndex ps = tm.makePeriodFrame() shifted = ps.shift(1) unshifted = shifted.shift(-1) self.assert_index_equal(shifted.index, ps.index) self.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.iloc[:, 0].valid().values, ps.iloc[:-1, 0].values) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, offsets.BDay()) assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', ps.shift, freq='D') # shift other axis # GH 6371 df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis=1) assert_frame_equal(result, expected) # shift named axis df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1) result = df.shift(1, axis='columns') assert_frame_equal(result, expected)
def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe["A"].shift(5) assert_series_equal(shiftedFrame["A"], shiftedSeries) shiftedFrame = self.tsframe.shift(-5) tm.assert_index_equal(shiftedFrame.index, self.tsframe.index) shiftedSeries = self.tsframe["A"].shift(-5) assert_series_equal(shiftedFrame["A"], shiftedSeries) # shift by 0 unshifted = self.tsframe.shift(0) assert_frame_equal(unshifted, self.tsframe) # shift by DateOffset shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay()) assert len(shiftedFrame) == len(self.tsframe) shiftedFrame2 = self.tsframe.shift(5, freq="B") assert_frame_equal(shiftedFrame, shiftedFrame2) d = self.tsframe.index[0] shifted_d = d + offsets.BDay(5) assert_series_equal(self.tsframe.xs(d), shiftedFrame.xs(shifted_d), check_names=False) # shift int frame int_shifted = self.intframe.shift(1) # noqa # Shifting with PeriodIndex ps = tm.makePeriodFrame() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, offsets.BDay()) assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, "B")) msg = "does not match PeriodIndex freq" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # shift other axis # GH 6371 df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1, ) result = df.shift(1, axis=1) assert_frame_equal(result, expected) # shift named axis df = DataFrame(np.random.rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, axis=1, ) result = df.shift(1, axis="columns") assert_frame_equal(result, expected)
def _check_moment_func( static_comp, name, raw, has_min_periods=True, has_center=True, has_time_rule=True, fill_value=None, zero_min_periods_equal=True, series=None, frame=None, **kwargs, ): # inject raw if name == "apply": kwargs = copy.copy(kwargs) kwargs["raw"] = raw def get_result(obj, window, min_periods=None, center=False): r = obj.rolling(window=window, min_periods=min_periods, center=center) return getattr(r, name)(**kwargs) series_result = get_result(series, window=50) assert isinstance(series_result, Series) tm.assert_almost_equal(series_result.iloc[-1], static_comp(series[-50:])) frame_result = get_result(frame, window=50) assert isinstance(frame_result, DataFrame) tm.assert_series_equal( frame_result.iloc[-1, :], frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), check_names=False, ) # check time_rule works if has_time_rule: win = 25 minp = 10 ser = series[::2].resample("B").mean() frm = frame[::2].resample("B").mean() if has_min_periods: series_result = get_result(ser, window=win, min_periods=minp) frame_result = get_result(frm, window=win, min_periods=minp) else: series_result = get_result(ser, window=win, min_periods=0) frame_result = get_result(frm, window=win, min_periods=0) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() trunc_series = series[::2].truncate(prev_date, last_date) trunc_frame = frame[::2].truncate(prev_date, last_date) tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) tm.assert_series_equal( frame_result.xs(last_date), trunc_frame.apply(static_comp, raw=raw), check_names=False, ) # excluding NaNs correctly obj = Series(randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN if has_min_periods: result = get_result(obj, 50, min_periods=30) tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) # min_periods is working correctly result = get_result(obj, 20, min_periods=15) assert isna(result.iloc[23]) assert not isna(result.iloc[24]) assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) obj2 = Series(randn(20)) result = get_result(obj2, 10, min_periods=5) assert isna(result.iloc[3]) assert notna(result.iloc[4]) if zero_min_periods_equal: # min_periods=0 may be equivalent to min_periods=1 result0 = get_result(obj, 20, min_periods=0) result1 = get_result(obj, 20, min_periods=1) tm.assert_almost_equal(result0, result1) else: result = get_result(obj, 50) tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) # window larger than series length (#7297) if has_min_periods: for minp in (0, len(series) - 1, len(series)): result = get_result(series, len(series) + 1, min_periods=minp) expected = get_result(series, len(series), min_periods=minp) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) else: result = get_result(series, len(series) + 1, min_periods=0) expected = get_result(series, len(series), min_periods=0) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) # check center=True if has_center: if has_min_periods: result = get_result(obj, 20, min_periods=15, center=True) expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15)[9:].reset_index(drop=True) else: result = get_result(obj, 20, min_periods=0, center=True) print(result) expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0)[9:].reset_index(drop=True) tm.assert_series_equal(result, expected) # shifter index s = [f"x{x:d}" for x in range(12)] if has_min_periods: minp = 10 series_xp = (get_result( series.reindex(list(series.index) + s), window=25, min_periods=minp, ).shift(-12).reindex(series.index)) frame_xp = (get_result( frame.reindex(list(frame.index) + s), window=25, min_periods=minp, ).shift(-12).reindex(frame.index)) series_rs = get_result(series, window=25, min_periods=minp, center=True) frame_rs = get_result(frame, window=25, min_periods=minp, center=True) else: series_xp = (get_result( series.reindex(list(series.index) + s), window=25, min_periods=0, ).shift(-12).reindex(series.index)) frame_xp = (get_result( frame.reindex(list(frame.index) + s), window=25, min_periods=0, ).shift(-12).reindex(frame.index)) series_rs = get_result(series, window=25, min_periods=0, center=True) frame_rs = get_result(frame, window=25, min_periods=0, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) frame_xp = frame_xp.fillna(fill_value) tm.assert_series_equal(series_xp, series_rs) tm.assert_frame_equal(frame_xp, frame_rs)