def _maybe_update_subscribers(self): step_is_daily = is_daily(bar_size=self._clock.time_step) if step_is_daily: bar_size = self._clock.time_step callbacks = self._bars_callback_table[bar_size] self._update_subscribers(bar_size=bar_size, callbacks=callbacks) else: for bar_size, callbacks in self._bars_callback_table.items(): sub_is_daily = is_daily(bar_size=bar_size) since_epoch = self._clock.datetime.timestamp() if (sub_is_daily and self._clock.end_of_day) or ( not sub_is_daily and since_epoch % bar_size.seconds == 0): self._update_subscribers(bar_size=bar_size, callbacks=callbacks)
def _validate( symbol: str, bar_size: timedelta, window: int, sma_offset: float, entry_n_shares: int, exit_start: Optional[time] = None, full_exit: Optional[time] = None, ): # TODO: validate symbol # TODO: validate bar_size if not window > 0 or not isinstance(window, int): raise ValueError( f"The window parameter must be a positive integer." f" Got {window}." ) if not entry_n_shares > 0 or not isinstance(entry_n_shares, int): raise ValueError( f"The entry_n_shares parameter must be a positive integer." f" Got {entry_n_shares}." ) if not is_daily(bar_size=bar_size): if exit_start is None or full_exit is None: raise ValueError( "When trading intraday, must provide a value for the" " exit_start and full_exit parameters." )
def download_data( self, symbol: str, start_date: date, end_date: date, bar_size: timedelta, **kwargs, ) -> pd.DataFrame: import pandas_datareader as pdr import yfinance as yf yf.pdr_override() interval = self._get_interval_str(interval=bar_size) data = pdr.data.get_data_yahoo( symbol, interval=interval, start=start_date, end=end_date + timedelta(days=1), ) if len(data) != 0: data = self._format_data(data=data) if not is_daily(bar_size=bar_size): end_date += timedelta(days=1) data = data.loc[start_date:end_date] return data
def tick(self): real_time.sleep(self._time_per_tick) if is_daily(bar_size=self._time_step): self._tick_daily() else: self._tick_intraday()
def download_bars_data( self, contract: AContract, start_date: date, end_date: date, bar_size: timedelta, rth: bool, **kwargs, ) -> pd.DataFrame: # TODO: test rth import pandas_datareader as pdr import yfinance as yf self._validate_bar_size(bar_size=bar_size) yf.pdr_override() interval = self._get_interval_str(interval=bar_size) data = pdr.data.get_data_yahoo( contract.symbol, interval=interval, start=start_date, end=end_date + timedelta(days=1), ) if len(data) != 0: data = self._format_data(data=data) if not is_daily(bar_size=bar_size): end_date += timedelta(days=1) data = data.loc[start_date:end_date] return data
def download_stock_data( self, symbol: str, request_date: date, bar_size: timedelta, ) -> pd.DataFrame: self._validate_bar_size(bar_size=bar_size) params = {"token": self._api_token} if is_daily(bar_size=bar_size): request_type = "chart" params["chartByDay"] = True params["range"] = "date" elif bar_size == timedelta(minutes=1): request_type = "intraday-prices" params["range"] = "1d" else: raise ValueError( f"{type(self)} can only download historical data or" f" 1-minute bars. Got a bar size of {bar_size}.") params["types"] = [request_type] url = f"{self._base_url}/stock/{symbol.lower()}/batch" params["exactDate"] = request_date.strftime(self._REQ_DATE_FORMAT) r = requests.get(url=url, params=params) json_data = json.loads(r.text) data = pd.DataFrame(data=json_data[request_type]) return data
def _get_cached_data( self, contract: AContract, start_date: date, end_date: date, bar_size: timedelta, schema_v: Optional[int], suffix: str, ) -> pd.DataFrame: contract_type = self._get_con_type(contract=contract) symbol = contract.symbol folder_path = self.base_data_path / contract_type / symbol / suffix data = pd.DataFrame() if not folder_path.exists(): return data self._validate_schema(folder_path=folder_path, schema_v=schema_v) file_names = hist_file_names( start_date=start_date, end_date=end_date, bar_size=bar_size, ) for file_name in file_names: file_path = folder_path / file_name if os.path.exists(file_path): day_data = pd.read_csv( file_path, index_col="datetime", parse_dates=True, ) data = data.append(day_data) if len(data) != 0: if is_daily(bar_size=bar_size): data = data.loc[start_date:end_date] return data
def download_bars_data( self, contract: AContract, start_date: date, end_date: date, bar_size: timedelta, rth: bool, **kwargs, ): # TODO: test rth data = pd.DataFrame() dates = generate_trading_days(start_date=start_date, end_date=end_date) for date_ in dates: day_data = self._conn.download_stock_data( symbol=contract.symbol, request_date=date_, bar_size=bar_size, ) data = data.append(other=day_data, ignore_index=True) if len(data) != 0: if is_daily(bar_size): data = self._format_daily_data(data=data) else: data = self._format_intraday_data(data=data) return data
def _get_latest_time_entry(self, contract: AContract, bar_size: timedelta) -> pd.Series: bar_data = self._get_data(contract=contract, bar_size=bar_size) curr_dt = self.sim_clock.datetime if is_daily(bar_size=bar_size): bar = bar_data.loc[pd.to_datetime(curr_dt.date())] else: curr_dt -= bar_size bar = bar_data.loc[curr_dt] return bar
def download_data( self, symbol: str, start_date: date, end_date: date, bar_size: timedelta, **kwargs, ): params = {"token": self._api_token} if is_daily(bar_size=bar_size): request_type = "chart" params["chartByDay"] = True params["range"] = "date" elif bar_size == timedelta(minutes=1): request_type = "intraday-prices" params["range"] = "1d" else: raise ValueError( f"{type(self)} can only download historical data or" f" 1-minute bars. Got a bar size of {bar_size}." ) params["types"] = [request_type] url = f"{self._base_url}/stock/{symbol.lower()}/batch" data = pd.DataFrame() dates = generate_trading_days(start_date=start_date, end_date=end_date) for date_ in dates: params["exactDate"] = date_.strftime(self._REQ_DATE_FORMAT) r = requests.get(url=url, params=params) json_data = json.loads(r.text) day_data = pd.DataFrame(data=json_data[request_type]) data = data.append(other=day_data, ignore_index=True) if len(data) != 0: if is_daily(bar_size): data = self._format_daily_data(data=data) else: data = self._format_intraday_data(data=data) return data
def _get_latest_bar(self, symbol: str, bar_size: timedelta) -> pd.Series: bar_data = self._get_data( symbol=symbol, bar_size=bar_size, ) curr_dt = self._clock.datetime if is_daily(bar_size=bar_size): bar = bar_data.loc[curr_dt.date()] else: curr_dt -= bar_size bar = bar_data.loc[curr_dt] return bar
def _get_next_bar(self, symbol: str, bar_size: timedelta) -> pd.Series: bar_data = self._get_data( symbol=symbol, bar_size=bar_size, ) curr_dt = self._clock.datetime if is_daily(bar_size=bar_size): curr_dt += bar_size while curr_dt.date() not in bar_data.index: curr_dt += bar_size bar = bar_data.loc[curr_dt.date()] else: bar = bar_data.loc[curr_dt] return bar
def _get_next_bar( self, contract: AContract, bar_size: timedelta, ) -> pd.Series: bar_data = self._get_data(contract=contract, bar_size=bar_size) curr_dt = self.sim_clock.datetime if is_daily(bar_size=bar_size): curr_dt += bar_size while curr_dt.date() not in bar_data.index: curr_dt += bar_size bar = bar_data.loc[curr_dt.date()] else: bar = bar_data.loc[curr_dt] return bar
def __init__( self, broker: ABroker, symbol: str, bar_size: timedelta, window: int, sma_offset: float, entry_n_shares: int, exit_start: Optional[time] = None, full_exit: Optional[time] = None, log: bool = False, ): self._validate( symbol=symbol, bar_size=bar_size, window=window, sma_offset=sma_offset, entry_n_shares=entry_n_shares, exit_start=exit_start, full_exit=full_exit, ) self._broker = broker self._symbol = symbol self._bar_size = bar_size self._sma_offset = sma_offset self._entry_n_shares = entry_n_shares self._daily = is_daily(bar_size=bar_size) self._exit_start = exit_start self._full_exit = full_exit self._log = log self._started = False self._halted = False self._sma_buffer = np.full((window,), np.nan) self._sma = None self._upper = None self._lower = None self._bar_count = 0 self._previous_price_state = None self.trades_log = pd.DataFrame(columns=["datetime", "action_code"])
def _cache_data( self, data: pd.DataFrame, contract: AContract, bar_size: timedelta, schema_v: Optional[int], suffix: str, ): if len(data) != 0: contract_type = self._get_con_type(contract=contract) symbol = contract.symbol folder_path = self.base_data_path / contract_type / symbol / suffix if not os.path.exists(path=folder_path): os.makedirs(name=folder_path) if schema_v: with open(folder_path / ".schema_v", "w") as f: f.write(str(schema_v)) self._validate_schema(folder_path=folder_path, schema_v=schema_v) if is_daily(bar_size=bar_size): file_path = folder_path / "daily.csv" if os.path.exists(file_path): day_data = pd.read_csv( file_path, index_col="datetime", parse_dates=True, ) data = data.append(day_data).sort_index() data.to_csv(file_path, date_format=DATE_FORMAT) else: data_by_date = data.groupby(pd.Grouper(freq="D")) for date_, group in data_by_date: if len(group) != 0: file_name = f"{date_.date().strftime(DATE_FORMAT)}.csv" file_path = folder_path / file_name group.to_csv(file_path, date_format=DATETIME_FORMAT)
def retrieve_bar_data( self, contract: AContract, bar_size: timedelta, start_date: Optional[date] = None, end_date: Optional[date] = None, cache_only: bool = False, cache_downloads: bool = True, rth: bool = True, allow_partial: bool = False, ) -> pd.DataFrame: """Retrieves the historical data. After loading available data from cache, any missing data is downloaded from the provider specified during initialization. Downloaded data is stored to the cache. Parameters ---------- contract : AContract bar_size : datetime.timedelta start_date : datetime.date, optional, default None end_date : datetime.date, optional, default None If the end date is set to today's date, it will be adjusted to yesterday's date to avoid storing partial historical data. cache_only : bool, default False Prevents data-download on cache-miss. cache_downloads : bool, default True Whether to cache downloaded data. rth : bool, default True Restrict to regular trading hours. allow_partial : bool, default False Allows downloading of partial data for today's date. This partial data is never cached. Returns ------- data : pd.DataFrame The requested historical data. """ if end_date == date.today(): if not allow_partial: end_date -= timedelta(days=1) end_cache_date = end_date else: end_cache_date = end_date - timedelta(days=1) else: end_cache_date = end_date if end_cache_date >= start_date: data = self._cache_handler.get_cached_bar_data( contract=contract, start_date=start_date, end_date=end_date, bar_size=bar_size, schema_v=AHistoricalProvider.BARS_SCHEMA_V, ) else: data = pd.DataFrame() if not cache_only: date_ranges = self._get_missing_date_ranges( data=data, start_date=start_date, end_date=end_date, ) for date_range in date_ranges: range_data = self._provider.download_bars_data( contract=contract, start_date=date_range[0], end_date=date_range[-1], bar_size=bar_size, rth=False, ) data = data.append(range_data) if cache_downloads: range_data = range_data[ range_data.index <= pd.to_datetime(end_cache_date) ] self._cache_handler.cache_bar_data( data=range_data, contract=contract, bar_size=bar_size, schema_v=AHistoricalProvider.BARS_SCHEMA_V, ) if rth and not is_daily(bar_size=bar_size): data = data.between_time( start_time=time(9, 30), end_time=time(16), include_end=False, ) return data