def test_date_range_lower_freq(): cal = mcal.get_calendar("NYSE") schedule = cal.schedule(pd.Timestamp('2017-09-05 20:00', tz='UTC'), pd.Timestamp('2017-10-23 20:00', tz='UTC')) # cannot get date range of frequency lower than 1D with pytest.raises(ValueError) as e: mcal.date_range(schedule, frequency='3D') assert e.exconly( ) == "ValueError: Frequency must be 1D or higher frequency." # instead get for 1D and convert to lower frequency short = mcal.date_range(schedule, frequency='1D') actual = mcal.convert_freq(short, '3D') expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='3D', tz='UTC') assert_index_equal(actual, expected) actual = mcal.convert_freq(short, '1W') expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='1W', tz='UTC') assert_index_equal(actual, expected)
def get_start_bia(self, st, s): start = s.start_dt_utc schedule = st.cal.schedule(start, start) days_in_advance = mcal.date_range(schedule, '1d') while len(days_in_advance) < st.bia: start += dt.timedelta(days=-1) schedule = st.cal.schedule(start, s.start_dt_utc) days_in_advance = mcal.date_range(schedule, '1d') return start
def __init__(self, start=None, end=None): all_minute_loc = glob.glob(f'/home/dewe/samgame/datasets/minute/*') self.sym_dict = { s.split('\\')[-1].split('_')[0]: s for s in all_minute_loc } if start and end: nyse = mcal.get_calendar('NYSE') early = nyse.schedule(start_date=start, end_date=end) full_date_range = mcal.date_range(early, frequency='1min').tz_convert(NY) self.full_date_range = full_date_range with open( f'/home/dewe/samgame/datasets/dates_{start.year}_{end.year}.pkl', 'wb') as pkl: pickle.dump(full_date_range, pkl) else: with open(f'/home/dewe/samgame/datasets/dates_2004_2020.pkl', 'rb') as pkl: self.full_date_range = pickle.load(pkl) self.all_syms = list(self.sym_dict.keys()) self.live_data = {} self.tech_indicators = None self.done = False
def returns_sharpe(cls, df, cal, s): returns = [] ts = [i for i in df.index] for i in range(len(ts))[:-1]: ret_high_res = df[ts[i + 1]] / df[ts[i]] - 1 returns.append(ret_high_res) if not s.instr == 'crypto': start = df.index[0] schedule = cal.schedule(start, start) dt_index = mcal.date_range(schedule, s.freq, dtype='datetime64', closed='left') trading_pts_day = len(dt_index) else: trading_pts_day = 24 * 3600 / s.freq_seconds() mean = np.mean(returns) std = np.std(returns) ann_mean = (1 + mean)**(trading_pts_day * 252) - 1 ann_std = std * np.sqrt(trading_pts_day * 252) asr = (ann_mean - s.risk_free_rate) / ann_std ar = ann_mean * 100 tr = (np.prod([1 + i for i in returns]) - 1) * 100 return ar, tr, asr
def create_market_cal(start, end): nyse = mcal.get_calendar('NYSE') schedule = nyse.schedule(stocks_start, stocks_end) market_cal = mcal.date_range(schedule, frequency='1D') market_cal = market_cal.tz_localize(None) market_cal = [i.replace(hour=0) for i in market_cal] return market_cal
def query_prices(ticker, session, db, portal, start_date=None, end_date=None): # First, check if security is present start_date, end_date = datetime_modification(start_date, end_date) result = check_for_security(ticker, session, db) if len(result) == 0: inserted = insert_missing_security(ticker, session, portal, db) if not inserted: return security_id = result['id'].values[0] # Now, find which prices we have (if any) query = session.query(models.SecurityPrice).outerjoin( models.Security).filter(models.Security.ticker == ticker).statement existing_data = pd.read_sql(query, db) if len(existing_data) > 0: existing_data.sort_values(by='date', inplace=True) existing_data.reset_index(inplace=True, drop=True) # Get trading days nyse = mcal.get_calendar('NYSE') early = nyse.schedule(start_date=start_date, end_date=end_date) # Identify missing days from existing data (accoutning for gaps) days_needed = pd.to_datetime(mcal.date_range(early, frequency='1D')) days_needed = [i.date() for i in days_needed] missing_dates = np.setdiff1d(days_needed, existing_data['date']) if len(missing_dates) > 0: missing_data = portal.fetch_price(ticker, start_date=missing_dates[0], end_date=missing_dates[-1]) missing_data['security_id'] = security_id missing_data.reset_index(inplace=True) missing_data.drop('label', inplace=True, axis=1) insert_prices(missing_data, session) existing_data = pd.read_sql(query, db) existing_data.sort_values(by='date', inplace=True) return existing_data[(existing_data['date'] >= start_date) & (existing_data['date'] <= end_date)] else: data = portal.fetch_price(ticker, start_date=start_date, end_date=end_date) data.reset_index(inplace=True) data['security_id'] = security_id data.drop('label', inplace=True) insert_prices(data, session) return data
def get_yearly_trading_calendar(year, cal='LSE'): """ Used to get the trading days using the LSE calendar uses pandas-market-calendars 'NYSE', 'LSE', 'CME', 'EUREX', 'TSX' """ lse = mcal.get_calendar(cal) year = lse.schedule(start_date=str(year) + '-01-01', end_date=str(year) + '-12-31') daily = mcal.date_range(year, frequency='1D') return daily
def set_index(self, start, end): schedule = mcal.get_calendar('NYSE').schedule(start_date=start, end_date=end) idx = mcal.date_range(schedule, frequency='1d').to_period('1d').to_timestamp() self.data = self.data.reindex(datetime=idx) self.date_idx = idx self.total_length = self.data.sizes['datetime'] self.starts = dict() self.starts['month'] = get_month_starts(self.date_idx) self.starts['week'] = get_week_starts(self.date_idx)
def get_recent_trading_days(delta: int = 10, current: datetime = None): """get the last 5 trading days""" if current is None: current = datetime.datetime.now(tz=cboe_calendar.tz) start = current + datetime.timedelta(days=-delta) recent = cboe_calendar.schedule(start_date=start.strftime(DATE_FORMAT), end_date=current.strftime(DATE_FORMAT)) days = market_cal.date_range(recent, frequency='1D') days = days.strftime(DATE_FORMAT) # just worked, not good fdays = [is_business_day(x, recent) for x in days] return days[fdays]
def __init__(self, start_date): self._start_date = self.get_date_obj(start_date) self._today = self.get_date_obj(datetime.now()) self._special_symbols = [ 'PRN', 'CON' ] # these tickers cause problems in python because they are reserved words nyse = mcal.get_calendar('NYSE') date_list = nyse.schedule( start_date=datetime.now().date() - timedelta(days=365 * 5), end_date=datetime.now().date() + timedelta(days=365 * 5)) dates = mcal.date_range(date_list, frequency='1D') self._nyse_schedule = pd.Series([x.date() for x in dates]) self._no_options = self.get_no_options_list()
def get_daterange(self): """ Generate a sequence of datetime values Returns ------- pandas.DatetimeIndex a range of dates """ exchange_cal = tcal.get_calendar(self.exchange) dates = exchange_cal.schedule(start_date=self.start, end_date=self.end) return tcal.date_range(dates, frequency='1D')
def __init__(self, path, symbols, interval=INTERVALS['1_DAY']): XArrayDataFeed.__init__(self, interval) self.path = path self.symbols = symbols self.hard_start = dt.date(1995, 1, 1) self.hard_stop = dt.date(2050, 1, 1) not_found = [] data_list = [] data_names = [] schedule = mcal.get_calendar('NYSE').schedule( start_date=self.hard_start, end_date=self.hard_stop) date_idx = mcal.date_range( schedule, frequency='1d').to_period('1d').to_timestamp() for s in tqdm(self.symbols): try: temp_data = pd.read_csv(os.path.join(self.path, '%s.csv' % s), header=None, names=[ 'open', 'high', 'low', 'close', 'adj_close', 'volume', 'c7', 'c8' ], index_col=0, parse_dates=True, infer_datetime_format=True) #temp_data = temp_data.reindex(pd.bdate_range(self.start_date,self.end_date),fill_value=None) temp_data = temp_data.reindex(date_idx, fill_value=None) temp_data = xr.DataArray(temp_data, dims=['datetime', 'fields']) data_list.append(temp_data) data_names.append(s) except IOError: not_found.append(s) for s in not_found: self.symbols.remove(s) self.data = xr.concat(data_list, dim=pd.Index(data_names).set_names('assets')) #assets = [a for a in assets if a in set(self.symbols)] #fields = [f for f in fields if f in set(self.fields)] #assets = [a for a in assets if a in set(self.symbols)] #fields = [f for f in fields if f in set(self.fields)]
def get_time_df(st, start, end, s): """ Determines the symbol's trading hours and creates a dataframe containing all the trading points between start with lookback and end. :return: Timestamp dataframe containing all required time points """ schedule = st.cal.schedule(start, end) dt_index = mcal.date_range(schedule, s.freq) df = pd.DataFrame(dt_index) df.columns = ['Timestamp'] mask1 = (df['Timestamp'] >= start) mask2 = (end >= df['Timestamp']) df = df.loc[mask1] df = df.loc[mask2] return df
def get_six_months_data_for_one_stock( api, symbol, interval="1Min", start="2020-04-01T09:30:00-04:00", end="2020-10-23T16:00:00-04:00", ): """ Using Alpaca to get roughly 6 months data. Note: Only valid for summer time Only valid for 1min so far Only valid for NASDAQ :param symbols: The parameter symbols can be either a comma-split string or a list of string. Each symbol becomes the key of the returned value. :param interval: One of minute, 1Min, 5Min, 15Min, day or 1D. minute is an alias of 1Min. Similarly, day is of 1D. :param limit: The maximum number of bars per symbol. It can be between 1 and 1000. Default is 100. :param start or end: ISO Format str, ex: '2019-04-15T09:30:00-04:00' or '2019-04-15' start='2020-04-01T09:30:00-04:00', end='2020-04-01T16:00:00-04:00' """ tmp_df = api.get_barset(symbols=symbol, timeframe=interval, start=start, end=end)[ symbol ].df tmp_df.index = tmp_df.index + datetime.timedelta(minutes=1) tmp_df["Time"] = tmp_df.index.time tmp_df["Date"] = tmp_df.index.date # Get the business dates and hours from 3rd-party package NASDAQ = mcal.get_calendar("NASDAQ") tmp_schedule = NASDAQ.schedule(start_date=start, end_date=end) business_datetime = mcal.date_range(tmp_schedule, frequency=interval).tz_convert( "America/New_York" ) # Merge Alpaca data to full datetime base_df = pd.DataFrame(index=business_datetime) base_df = base_df.merge(tmp_df, how="left", left_index=True, right_index=True) return base_df
def create_market_cal(stocks_start, stocks_end): """ Uses the pandas_market_calendars library to find all relevant trading days within a specified timeframe. This library automatically filters out non-trading days based on the market, so no need to worry about trying to join data to invalid dates by using something like pandas.date_range. Since all stocks are US-based, so selected NYSE as calendar, and then standardized the timestamps to make them easy to join on later. :param stocks_start: :param stocks_end: :return: """ nyse = mcal.get_calendar('NYSE') schedule = nyse.schedule(stocks_start, stocks_end) market_cal = mcal.date_range(schedule, frequency='1D') market_cal = market_cal.tz_localize(None) market_cal = [i.replace(hour=0) for i in market_cal] return market_cal
def remove_overnight(cls, values, strats, st_id, s): cal = strats[st_id].cal schedule = cal.schedule(values.index[0], values.index[-1]) dt_index = mcal.date_range(schedule, s.freq, dtype='datetime64', closed='left') df = pd.DataFrame(dt_index) df.columns = ['Timestamp'] mask1 = (df['Timestamp'] >= values.index[0]) mask2 = (values.index[-1] >= df['Timestamp']) df = df.loc[mask1] df = df.loc[mask2] values = values.reset_index() values = df.merge(values, how='left', on='Timestamp') values = values.set_index('Timestamp') return values
def get_trade_days(start_date: str = '-1y', end_date: str = arrow.now(), market: str = 'SSE', frequency: str = '1d'): """get trade days for data cleaning. 交易日列表,用于数据清洗 :param start_date: (optional) start date of the custom return, default is `-1y`. value: -nd -nw -nm -ny cyear or YYYY-MM-DD :param end_date: (optional) the end date of the results, default is `now`. :param market: (optional) market name, default is `SSE`. value: NYSE NASDAQ SSE HKEX :param frequency: (optional) frequency of date, default is `1d`. :return: pd.DatetimeIndex """ begin = str2date(start_date).format('YYYY-MM-DD') end = arrow.get(end_date).format('YYYY-MM-DD') mkt = mcal.get_calendar(market) calendar = mkt.schedule(begin, end) return mcal.date_range(calendar, frequency)
def init(): DATA.mkdir() (DATA / "options").mkdir() (DATA / "ohlc").mkdir() (DATA / "keystats").mkdir() (DATA / "analysis").mkdir() min_date = DATE max_date = f"{int(DATE[:4])+10}"+DATE[4:] nyse = mcal.get_calendar('NYSE') schedule = nyse.schedule(start_date=min_date, end_date=max_date) trading_days = mcal.date_range(schedule, frequency="1D") CONFIG['trading_days'] = [str(day)[:10] for day in trading_days] CONFIG['reg_expirations'] = calculate_regular_expiries(min_date, max_date) CONFIG['ratemap'] = _connector.get_ratemap()
def CreateFinanceCalendar(self): print("---Building calendar for the period from {0} to {1}".format(self.start_date, self.end_date)) nyse = mcal.get_calendar('NYSE') early = nyse.schedule(start_date=self.start_date, end_date=self.end_date) calendar_np = np.array(mcal.date_range(early, frequency='1D') .map(lambda t: t.strftime('%Y-%m-%d')), dtype=object) self.finance_calendar = pd.DataFrame(data = calendar_np[0:], index=calendar_np[0:], columns=['Date']) self.start_date = np.min(self.finance_calendar) self.end_date = np.max(self.finance_calendar) self.finance_calendar['Date_int'] = self.finance_calendar['Date'].map(lambda x: x.replace("-", "")) self.finance_calendar['Date_int'] = self.finance_calendar['Date_int'].astype(np.dtype('int32')) self.finance_calendar.set_index('Date_int', inplace = True) #del self.finance_calendar['Date'] #self.finance_calendar.reset_index(inplace = True) ##del self.finance_calendar['index'] #self.finance_calendar.reindex() #self.finance_calendar.reset_index(inplace = True) print("===Calendar is created")
def update_data(self, current_date: datetime, ib: IB, use_latest_date, random_data_stddev=0): """ Update the stock's daily bar data. """ if 0: # Get recent NYSE trading days. date_format = '%Y-%m-%d' nyse_sched = nyse.schedule( start_date=(current_date - timedelta(days=40)).strftime(date_format), end_date=current_date.strftime(date_format)) dates = mcal.date_range(nyse_sched, frequency='1D') # Get the dates we want to load. # Load an extra day, in case we're trading today and the extra day is needed for later calculations. dates = dates[-(self.lookback + 1):] # print(dates) # Load historical data from IB. # last_date = dates[-1] # print(f'Requesting historical data for stock {self.ticker} at date {last_date.strftime("%Y%m%d %H:%M:%S")}') print( f'Requesting historical data for stock {self.ticker} at most recent date.' ) ib_data = ib.request_historical_data( self.ticker, 21, last_date=None if use_latest_date else current_date) # print(ib_data.to_string()) if random_data_stddev != 0: noise = np.random.normal(0, ib_data['close'] * random_data_stddev) ib_data['close'] += noise # self.data = ib_data return ib_data
def __getitem__(self, date_range: slice) -> "Backtester": if self._run_before: raise BacktestRunError( "Backtest has already run, build a new backtester to run again." ) self._run_before = True if self.assume_nyse: self._calendar = "NYSE" if date_range.start is not None: start_date = date_range.start else: raise ValueError( "a date range without a start value is not allowed") if date_range.stop is not None: end_date = date_range.stop else: self._warn.append( "backtests with no end date can lead to non-replicable results" ) end_date = datetime.date.today() - relativedelta(days=1) cal = mcal.get_calendar(self._calendar) if isinstance(start_date, relativedelta): start_date = datetime.date.today() + start_date if isinstance(end_date, relativedelta): end_date = datetime.date.today() + end_date sched = cal.schedule(start_date=start_date, end_date=end_date) self._schedule = sched self.dates = mcal.date_range(sched, frequency="1D") self.datetimes = [] self.dates = [d.date() for d in self.dates] for date in self.dates: self.datetimes += [ sched.loc[date]["market_open"], sched.loc[date]["market_close"], ] if self._has_strategies: self._set_strategies(self._temp_strategies) return self
def test_date_range_minute(): cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(10, 30)) # New Years Eve and weekend skipped schedule = cal.schedule('2015-12-31', '2016-01-06') actual = mcal.date_range(schedule, '1min', force_close=True) assert len(actual) == 4 * 90 assert actual[0] == pd.Timestamp('2015-12-31 09:01', tz=cal.tz) assert actual[len(actual) - 1] == pd.Timestamp('2016-01-06 10:30', tz=cal.tz) for x in [ '2015-12-31 09:02', '2015-12-31 10:30', '2016-01-04 09:01', '2016-01-06 09:01' ]: assert pd.Timestamp(x, tz=cal.tz) in actual for x in [ '2015-12-31 09:00', '2015-12-31 10:31', '2016-01-02 09:01', '2016-01-03 09:01', '2016-01-06 09:00' ]: assert pd.Timestamp(x, tz=cal.tz) not in actual # July 3 is late open and early close cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(12, 0)) schedule = cal.schedule('2012-07-02', '2012-07-04') actual = mcal.date_range(schedule, '1min') assert len(actual) == 375 # 2 days of 3 hours, and one day of 15 mins assert actual[0] == pd.Timestamp('2012-07-02 09:01', tz=cal.tz) assert actual[len(actual) - 1] == pd.Timestamp('2012-07-04 12:00', tz=cal.tz) for x in [ '2012-07-02 09:02', '2012-07-02 12:00', '2012-07-03 11:16', '2012-07-03 11:30', '2012-07-04 09:01' ]: assert pd.Timestamp(x, tz=cal.tz) in actual for x in [ '2012-07-02 09:00', '2012-07-02 12:01', '2012-07-03 11:15', '2012-07-03 11:31', '2012-07-04 09:00' ]: assert pd.Timestamp(x, tz=cal.tz) not in actual # Dec 13, 2016 is ad-hoc late open, include the open with closed=True, Dec 14 is ad-hoc early close cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(12, 0)) schedule = cal.schedule('2016-12-13', '2016-12-14') actual = mcal.date_range(schedule, '1min', closed=None) assert len(actual) == 41 + (61 + 60 + 40) assert actual[0] == pd.Timestamp('2016-12-13 11:20', tz=cal.tz) assert actual[len(actual) - 1] == pd.Timestamp('2016-12-14 11:40', tz=cal.tz) for x in ['2016-12-13 11:21', '2016-12-13 12:00', '2016-12-14 09:00']: assert pd.Timestamp(x, tz=cal.tz) in actual for x in [ '2016-12-13 11:19', '2016-12-13 12:01', '2016-12-14 08:59', '2016-12-14 11:41' ]: assert pd.Timestamp(x, tz=cal.tz) not in actual
def run(self): # # Initial data update for all pairs # dt = datetime.now(tz=tz) # self.update_data(dt, self.ib) sleep_time_short = 30 # seconds sleep_time_long = 10 * 60 daily_reset_done = True daily_data_updates_done = False daily_trades_done = False daily_position_updates_done = False # if self.ignore_dt: # # The number of the day into the simulation, when testing. # dummy_day_num = 0 if self.is_backtest: # Generate the trading dates within the backtest period. date_format = '%Y-%m-%d' nyse_sched = nyse.schedule( start_date=self.backtest_start.strftime(date_format), end_date=self.backtest_end.strftime(date_format)) backtest_dates = mcal.date_range(nyse_sched, frequency='1D') backtest_dates = [ts.to_pydatetime() for ts in backtest_dates] print(backtest_dates) backtest_day_num = 0 while True: if not self.is_backtest: dt = datetime.now(tz=tz) else: if backtest_day_num == len(backtest_dates): break dt = backtest_dates[backtest_day_num] print(dt) # Sleep through non-trade days. date_format = '%Y-%m-%d' current_date_str = dt.strftime(date_format) nyse_sched = nyse.schedule( start_date=(dt - timedelta(days=1)).strftime(date_format), end_date=current_date_str) latest_trade_date = mcal.date_range(nyse_sched, frequency='1D')[-1] latest_trade_date_str = latest_trade_date.strftime(date_format) if current_date_str != latest_trade_date_str: # This isn't a trade day. Sleep a while. print(f'Not a trade day. Sleeping {sleep_time_long} seconds.') time.sleep(sleep_time_long) continue # if self.ignore_dt: # # Go to the next day, so we can simulate the progression of days. # dt = dt + timedelta(days=dummy_day_num) # At 12:00AM reset the update flags. if (not daily_reset_done and dt.hour == 0 and dt.minute >= 0) or self.is_backtest: print(f'Resetting update flags at time {dt}.') daily_reset_done = True daily_data_updates_done = False daily_trades_done = False daily_position_updates_done = False # At 2:30PM update time series. if (not daily_data_updates_done and dt.hour == 14 and dt.minute >= 30) or self.is_backtest: # 15:30 print(f'Updating data at time {dt}.') # When testing, Use the current dt to get historical data. update_dt = dt #if not self.ignore_dt else dt - timedelta(days=dummy_day_num) self.update_data(update_dt, dt, self.ib) print(f'Finished updating data at time {dt}.') daily_data_updates_done = True # At 3:30PM place enter/exit orders. if (not daily_trades_done and dt.hour == 15 and dt.minute >= 30) or self.is_backtest: # 15:30 print(f'Generating trades at time {dt}.') exit_trades, enter_trades = self.generate_trades(dt, self.ib) print(f'Finished generating trades at time {dt}.') daily_trades_done = True # At 4:30PM update trades. if (not daily_position_updates_done and dt.hour == 16 and dt.minute >= 30) or self.is_backtest: print(f'Updating positions at time {dt}.') self.update_positions(dt, self.ib) print(f'Finished positions at time {dt}.') daily_position_updates_done = True daily_reset_done = False if self.is_backtest: backtest_day_num += 1 if not self.is_backtest: time.sleep(sleep_time_short)
import tarfile as tar from tables import * import pandas as pd import sys, os sys.path.append("../equities") from calculations import calculate_trading_days ################################################################################################### NEWDIR = Path(f"{DIR}/data/new") BUCKET = storage.Client().bucket(CONFIG["gcp_bucket_name"]) nyse = mcal.get_calendar('NYSE') schedule = nyse.schedule(start_date="2019-01-01", end_date="2029-01-01") TDAYS = mcal.date_range(schedule, frequency="1D").tolist() TDAYS = [str(day)[:10] for day in TDAYS] SUBSET = None ################################################################################################### def download_data(): FOLDERS = ["equities", "treasuryrates", "instruments", "splits"] if not SUBSET: os.mkdir(f"{DIR}/data/old") os.mkdir(f"{DIR}/data/new") os.mkdir(f"{DIR}/data/tar")
def get_last_trading_day() -> datetime: nyse = mcal.get_calendar('NYSE') early = nyse.schedule(start_date=datetime.now() - relativedelta(days=5), end_date=datetime.now()) lastday = mcal.date_range(early, frequency='1D')[-1] return lastday.date()
def test_date_range_permutations(): # open_time = 9, close_time = 11.30, freq = "1H" cal = FakeCalendar(open_time=datetime.time(9), close_time=datetime.time(11, 30)) schedule = cal.schedule("2021-01-05", "2021-01-05") # result matching values for: closed force_close # 9 10 11 left False/ left None/ both False/ None False expected = pd.DatetimeIndex([ "2021-01-05 01:00:00+00:00", "2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00" ], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="left", force_close=False) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed="left", force_close=None) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed="both", force_close=False) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed=None, force_close=False) assert_index_equal(actual, expected) # 9 10 11 11.30 left True/ both True/ None True expected = pd.DatetimeIndex([ "2021-01-05 01:00:00+00:00", "2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00", "2021-01-05 03:30:00+00:00" ], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="left", force_close=True) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed="both", force_close=True) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed=None, force_close=True) assert_index_equal(actual, expected) # 10 11 right False expected = pd.DatetimeIndex( ["2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00"], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="right", force_close=False) assert_index_equal(actual, expected) # 10 11 11.30 right True expected = pd.DatetimeIndex([ "2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00", "2021-01-05 03:30:00+00:00" ], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="right", force_close=True) assert_index_equal(actual, expected) # 10 11 12 right None expected = pd.DatetimeIndex([ "2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00", "2021-01-05 04:00:00+00:00" ], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="right", force_close=None) assert_index_equal(actual, expected) # 9 10 11 12 both None/ None None expected = pd.DatetimeIndex([ "2021-01-05 01:00:00+00:00", "2021-01-05 02:00:00+00:00", "2021-01-05 03:00:00+00:00", "2021-01-05 04:00:00+00:00" ], tz="UTC") actual = mcal.date_range(schedule, "1H", closed="both", force_close=None) assert_index_equal(actual, expected) actual = mcal.date_range(schedule, "1H", closed=None, force_close=None) assert_index_equal(actual, expected)
print("Just Updating the meta Data") data['_Source']['Meta Data'] = new_data['Meta Data'] with open(new_file_name, 'w') as fp: json.dump(data, fp) if new_file_name != old_file_name: os.remove(old_file_name) else: new_date_str = max_timestamp_new.strftime('%Y-%m-%d') old_date_str = max_timestamp_old.strftime('%Y-%m-%d') nyse = mcal.get_calendar('NYSE') date_range = nyse.schedule(start_date=old_date_str, end_date=new_date_str) datetime_index = mcal.date_range(date_range, frequency='1D') datetime_index = datetime_index.tz_convert(tm_zone.time_zone) fun_time_str = lambda x: x.strftime('%Y-%m-%d') time_str_list = list(map(fun_time_str, datetime_index.to_pydatetime())) data['_Source']['Meta Data'] = new_data['Meta Data'] for time_str in time_str_list: data['_Source']['Time Series (Daily)'][time_str] = new_data[ 'Time Series (Daily)'][time_str] with open(new_file_name, 'w') as fp: json.dump(data, fp) print('Writing the file is completed') os.remove(old_file_name)
def test_date_range_hour(): cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(10, 30)) # New Years Eve and weekend skipped expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2015-12-31 10:00', '2015-12-31 10:30', '2016-01-04 10:00', '2016-01-04 10:30', '2016-01-05 10:00', '2016-01-05 10:30', '2016-01-06 10:00', '2016-01-06 10:30' ] ]) schedule = cal.schedule('2015-12-31', '2016-01-06') actual = mcal.date_range(schedule, '1H', force_close=True) assert_index_equal(actual, expected) # If force_close False for then result is missing close if not on even increment expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2015-12-31 10:00', '2016-01-04 10:00', '2016-01-05 10:00', '2016-01-06 10:00' ] ]) schedule = cal.schedule('2015-12-31', '2016-01-06') actual = mcal.date_range(schedule, '1H', force_close=False) assert_index_equal(actual, expected) cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(12, 0)) # July 3 is late open and early close expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2012-07-02 10:00', '2012-07-02 11:00', '2012-07-02 12:00', '2012-07-03 11:30', '2012-07-04 10:00', '2012-07-04 11:00', '2012-07-04 12:00' ] ]) schedule = cal.schedule('2012-07-02', '2012-07-04') actual = mcal.date_range(schedule, '1H') assert_index_equal(actual, expected) # Dec 14, 2016 is adhoc early close expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2016-12-14 10:00', '2016-12-14 11:00', '2016-12-14 11:40', '2016-12-15 10:00', '2016-12-15 11:00', '2016-12-15 12:00' ] ]) schedule = cal.schedule('2016-12-14', '2016-12-15') actual = mcal.date_range(schedule, '1H') assert_index_equal(actual, expected) # Dec 13, 2016 is adhoc late open, include the open with closed=True expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2016-12-13 11:20', '2016-12-13 12:00', '2016-12-14 09:00', '2016-12-14 10:00', '2016-12-14 11:00', '2016-12-14 11:40' ] ]) schedule = cal.schedule('2016-12-13', '2016-12-14') actual = mcal.date_range(schedule, '1H', closed=None) assert_index_equal(actual, expected)
def test_date_range_daily(): cal = FakeCalendar(open_time=datetime.time(9, 0), close_time=datetime.time(12, 0)) # If closed='right' and force_close False for daily then the result is empty expected = pd.DatetimeIndex([], tz='UTC') schedule = cal.schedule('2015-12-31', '2016-01-06') actual = mcal.date_range(schedule, '1D', force_close=False, closed='right') assert_index_equal(actual, expected) # New years is holiday expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2015-12-31 12:00', '2016-01-04 12:00', '2016-01-05 12:00', '2016-01-06 12:00' ] ]) schedule = cal.schedule('2015-12-31', '2016-01-06') actual = mcal.date_range(schedule, '1D') assert_index_equal(actual, expected) # July 3 is early close expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in ['2012-07-02 12:00', '2012-07-03 11:30', '2012-07-04 12:00'] ]) schedule = cal.schedule('2012-07-02', '2012-07-04') actual = mcal.date_range(schedule, '1D') assert_index_equal(actual, expected) # Dec 14, 2016 is adhoc early close expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in ['2016-12-13 12:00', '2016-12-14 11:40', '2016-12-15 12:00'] ]) schedule = cal.schedule('2016-12-13', '2016-12-15') actual = mcal.date_range(schedule, '1D') assert_index_equal(actual, expected) # July 3 is late open expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in ['2012-07-02 09:00', '2012-07-03 11:15', '2012-07-04 09:00'] ]) schedule = cal.schedule('2012-07-02', '2012-07-04') actual = mcal.date_range(schedule, '1D', force_close=False, closed=None) assert_index_equal(actual, expected) # Dec 13, 2016 is adhoc late open expected = pd.DatetimeIndex([ pd.Timestamp(x, tz=cal.tz).tz_convert('UTC') for x in [ '2016-12-13 11:20', '2016-12-13 12:00', '2016-12-14 09:00', '2016-12-14 11:40', '2016-12-15 09:00', '2016-12-15 12:00' ] ]) schedule = cal.schedule('2016-12-13', '2016-12-15') actual = mcal.date_range(schedule, '1D', force_close=True, closed=None) assert_index_equal(actual, expected)
def test_date_range_w_breaks(): cal = FakeBreakCalendar() schedule = cal.schedule('2016-12-28', '2016-12-28') expected = [ '2016-12-28 14:30:00+00:00', '2016-12-28 15:00:00+00:00', '2016-12-28 16:00:00+00:00', '2016-12-28 16:30:00+00:00', '2016-12-28 17:00:00+00:00' ] actual = mcal.date_range(schedule, '30min', closed=None) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual expected = [ '2016-12-28 15:00:00+00:00', '2016-12-28 16:30:00+00:00', '2016-12-28 17:00:00+00:00' ] actual = mcal.date_range(schedule, '30min', closed='right') assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual expected = [ '2016-12-28 14:30:00+00:00', '2016-12-28 16:00:00+00:00', '2016-12-28 16:30:00+00:00' ] actual = mcal.date_range(schedule, '30min', closed='left', force_close=False) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual expected = [ '2016-12-28 14:30:00+00:00', '2016-12-28 16:00:00+00:00', '2016-12-28 16:30:00+00:00', '2016-12-28 17:00:00+00:00' ] actual = mcal.date_range(schedule, '30min', closed='left', force_close=True) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual # when the open is the break start schedule = cal.schedule('2016-12-29', '2016-12-29') expected = [ '2016-12-29 15:20:00+00:00', '2016-12-29 16:05:00+00:00', '2016-12-29 16:20:00+00:00', '2016-12-29 16:35:00+00:00', '2016-12-29 16:50:00+00:00', '2016-12-29 17:00:00+00:00' ] actual = mcal.date_range(schedule, '15min', closed=None) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual expected = [ '2016-12-29 16:05:00+00:00', '2016-12-29 16:20:00+00:00', '2016-12-29 16:35:00+00:00', '2016-12-29 16:50:00+00:00', '2016-12-29 17:00:00+00:00' ] actual = mcal.date_range(schedule, '15min', closed='right') assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual # when the close is the break end schedule = cal.schedule('2016-12-30', '2016-12-30') # force close True expected = [ '2016-12-30 14:30:00+00:00', '2016-12-30 14:45:00+00:00', '2016-12-30 15:00:00+00:00', '2016-12-30 15:40:00+00:00' ] actual = mcal.date_range(schedule, '15min', closed=None, force_close=True) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual # force close False expected = [ '2016-12-30 14:30:00+00:00', '2016-12-30 14:45:00+00:00', '2016-12-30 15:00:00+00:00' ] actual = mcal.date_range(schedule, '15min', closed=None, force_close=False) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual expected = ['2016-12-30 14:45:00+00:00', '2016-12-30 15:00:00+00:00'] actual = mcal.date_range(schedule, '15min', closed='right', force_close=False) assert len(actual) == len(expected) for x in expected: assert pd.Timestamp(x) in actual