def download_symbol(cls, symbol: tp.Label, start: tp.DatetimeLike = 0, end: tp.DatetimeLike = 'now', freq: tp.Union[None, str, pd.DateOffset] = None, date_range_kwargs: tp.KwargsLike = None, **kwargs) -> tp.SeriesFrame: """Download the symbol. Generates datetime index and passes it to `SyntheticData.generate_symbol` to fill the Series/DataFrame with generated data.""" if date_range_kwargs is None: date_range_kwargs = {} index = pd.date_range( start=to_tzaware_datetime(start, tz=get_utc_tz()), end=to_tzaware_datetime(end, tz=get_utc_tz()), freq=freq, **date_range_kwargs ) if len(index) == 0: raise ValueError("Date range is empty") return cls.generate_symbol(symbol, index, **kwargs)
keep='last', drop_redundant=True, ignore_default=True ), array_wrapper=dict( column_only_select=False, group_select=True, freq=None, silence_warnings=False ), datetime=dict( naive_tz=get_local_tz(), to_py_timezone=True ), data=dict( tz_localize=get_utc_tz(), tz_convert=get_utc_tz(), missing_index='nan', missing_columns='raise', binance=Config( # flex dict( api_key=None, api_secret=None ) ), ccxt=Config( # flex dict( enableRateLimit=True ) ), stats=Config(), # flex
def download_symbol(cls, symbol: str, timeframe: str = '1d', start: tp.DatetimeLike = 0, end: tp.DatetimeLike = 'now UTC', adjustment: tp.Optional[str] = 'all', limit: int = 500, exchange: tp.Optional[str] = 'CBSE', **kwargs) -> tp.Frame: """Download the symbol. Args: symbol (str): Symbol. timeframe (str): Timeframe of data. Must be integer multiple of 'm' (minute), 'h' (hour) or 'd' (day). i.e. '15m'. See https://alpaca.markets/data. !!! note Data from the latest 15 minutes is not available with a free data plan. start (any): Start datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. end (any): End datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. adjustment (str): Specifies the corporate action adjustment for the stocks. Allowed are `raw`, `split`, `dividend` or `all`. limit (int): The maximum number of returned items. exchange (str): For crypto symbols. Which exchange you wish to retrieve data from. Allowed are `FTX`, `ERSX`, `CBSE` For defaults, see `data.alpaca` in `vectorbt._settings.settings`. """ from vectorbt._settings import settings from alpaca_trade_api.rest import TimeFrameUnit, TimeFrame, REST alpaca_cfg = settings['data']['alpaca'] client_kwargs = dict() for k in get_func_kwargs(REST): if k in kwargs: client_kwargs[k] = kwargs.pop(k) client_kwargs = merge_dicts(alpaca_cfg, client_kwargs) client = REST(**client_kwargs) _timeframe_units = {'d': TimeFrameUnit.Day, 'h': TimeFrameUnit.Hour, 'm': TimeFrameUnit.Minute} if len(timeframe) < 2: raise ValueError("invalid timeframe") amount_str = timeframe[:-1] unit_str = timeframe[-1] if not amount_str.isnumeric() or unit_str not in _timeframe_units: raise ValueError("invalid timeframe") amount = int(amount_str) unit = _timeframe_units[unit_str] _timeframe = TimeFrame(amount, unit) start_ts = to_tzaware_datetime(start, tz=get_utc_tz()).isoformat() end_ts = to_tzaware_datetime(end, tz=get_utc_tz()).isoformat() def _is_crypto_symbol(symbol): return len(symbol) == 6 and "USD" in symbol if _is_crypto_symbol(symbol): df = client.get_crypto_bars( symbol=symbol, timeframe=_timeframe, start=start_ts, end=end_ts, limit=limit, exchanges=exchange ).df else: df = client.get_bars( symbol=symbol, timeframe=_timeframe, start=start_ts, end=end_ts, adjustment=adjustment, limit=limit ).df # filter for OHLCV # remove extra columns df.drop(['trade_count', 'vwap'], axis=1, errors='ignore', inplace=True) # capitalize df.rename(columns={ 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volume': 'Volume', 'exchange': 'Exchange' }, inplace=True) df['Open'] = df['Open'].astype(float) df['High'] = df['High'].astype(float) df['Low'] = df['Low'].astype(float) df['Close'] = df['Close'].astype(float) df['Volume'] = df['Volume'].astype(float) return df
def _ts_to_str(ts): return str(pd.Timestamp(to_tzaware_datetime(ts, tz=get_utc_tz())))
def download_symbol(cls, symbol: str, exchange: tp.Union[str, "ExchangeT"] = 'binance', config: tp.Optional[dict] = None, timeframe: str = '1d', start: tp.DatetimeLike = 0, end: tp.DatetimeLike = 'now UTC', delay: tp.Optional[float] = None, limit: tp.Optional[int] = 500, retries: int = 3, show_progress: bool = True, params: tp.Optional[dict] = None, tqdm_kwargs: tp.KwargsLike = None) -> tp.Frame: """Download the symbol. Args: symbol (str): Symbol. exchange (str or object): Exchange identifier or an exchange object of type `ccxt.base.exchange.Exchange`. config (dict): Config passed to the exchange upon instantiation. Will raise an exception if exchange has been already instantiated. timeframe (str): Timeframe supported by the exchange. start (any): Start datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. end (any): End datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. delay (float): Time to sleep after each request (in milliseconds). !!! note Use only if `enableRateLimit` is not set. limit (int): The maximum number of returned items. retries (int): The number of retries on failure to fetch data. show_progress (bool): Whether to show the progress bar. tqdm_kwargs (dict): Keyword arguments passed to `tqdm`. params (dict): Exchange-specific key-value parameters. For defaults, see `data.ccxt` in `vectorbt._settings.settings`. """ import ccxt from vectorbt._settings import settings ccxt_cfg = settings['data']['ccxt'] if config is None: config = {} if tqdm_kwargs is None: tqdm_kwargs = {} if params is None: params = {} if isinstance(exchange, str): if not hasattr(ccxt, exchange): raise ValueError(f"Exchange {exchange} not found") # Resolve config default_config = {} for k, v in ccxt_cfg.items(): # Get general (not per exchange) settings if k in ccxt.exchanges: continue default_config[k] = v if exchange in ccxt_cfg: default_config = merge_dicts(default_config, ccxt_cfg[exchange]) config = merge_dicts(default_config, config) exchange = getattr(ccxt, exchange)(config) else: if len(config) > 0: raise ValueError("Cannot apply config after instantiation of the exchange") if not exchange.has['fetchOHLCV']: raise ValueError(f"Exchange {exchange} does not support OHLCV") if timeframe not in exchange.timeframes: raise ValueError(f"Exchange {exchange} does not support {timeframe} timeframe") if exchange.has['fetchOHLCV'] == 'emulated': warnings.warn("Using emulated OHLCV candles", stacklevel=2) def _retry(method): @wraps(method) def retry_method(*args, **kwargs): for i in range(retries): try: return method(*args, **kwargs) except (ccxt.NetworkError, ccxt.ExchangeError) as e: if i == retries - 1: raise e if delay is not None: time.sleep(delay / 1000) return retry_method @_retry def _fetch(_since, _limit): return exchange.fetch_ohlcv( symbol, timeframe=timeframe, since=_since, limit=_limit, params=params ) # Establish the timestamps start_ts = datetime_to_ms(to_tzaware_datetime(start, tz=get_utc_tz())) try: first_data = _fetch(0, 1) first_valid_ts = first_data[0][0] next_start_ts = start_ts = max(start_ts, first_valid_ts) except: next_start_ts = start_ts end_ts = datetime_to_ms(to_tzaware_datetime(end, tz=get_utc_tz())) def _ts_to_str(ts): return str(pd.Timestamp(to_tzaware_datetime(ts, tz=get_utc_tz()))) # Iteratively collect the data data: tp.List[list] = [] with tqdm(disable=not show_progress, **tqdm_kwargs) as pbar: pbar.set_description(_ts_to_str(start_ts)) while True: # Fetch the klines for the next interval next_data = _fetch(next_start_ts, limit) if len(data) > 0: next_data = list(filter(lambda d: next_start_ts < d[0] < end_ts, next_data)) else: next_data = list(filter(lambda d: d[0] < end_ts, next_data)) # Update the timestamps and the progress bar if not len(next_data): break data += next_data pbar.set_description("{} - {}".format( _ts_to_str(start_ts), _ts_to_str(next_data[-1][0]) )) pbar.update(1) next_start_ts = next_data[-1][0] if delay is not None: time.sleep(delay / 1000) # be kind to api # Convert data to a DataFrame df = pd.DataFrame(data, columns=[ 'Open time', 'Open', 'High', 'Low', 'Close', 'Volume' ]) df.index = pd.to_datetime(df['Open time'], unit='ms', utc=True) del df['Open time'] df['Open'] = df['Open'].astype(float) df['High'] = df['High'].astype(float) df['Low'] = df['Low'].astype(float) df['Close'] = df['Close'].astype(float) df['Volume'] = df['Volume'].astype(float) return df
def _ts_to_str(ts: tp.DatetimeLike) -> str: return str(pd.Timestamp(to_tzaware_datetime(ts, tz=get_utc_tz())))
def download_symbol(cls, symbol: str, client: tp.Optional["ClientT"] = None, interval: str = '1d', start: tp.DatetimeLike = 0, end: tp.DatetimeLike = 'now UTC', delay: tp.Optional[float] = 500, limit: int = 500, show_progress: bool = True, tqdm_kwargs: tp.KwargsLike = None) -> tp.Frame: """Download the symbol. Args: symbol (str): Symbol. client (binance.client.Client): Binance client of type `binance.client.Client`. interval (str): Kline interval. See `binance.enums`. start (any): Start datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. end (any): End datetime. See `vectorbt.utils.datetime_.to_tzaware_datetime`. delay (float): Time to sleep after each request (in milliseconds). limit (int): The maximum number of returned items. show_progress (bool): Whether to show the progress bar. tqdm_kwargs (dict): Keyword arguments passed to `tqdm`. For defaults, see `data.binance` in `vectorbt._settings.settings`. """ if client is None: raise ValueError("client must be provided") if tqdm_kwargs is None: tqdm_kwargs = {} # Establish the timestamps start_ts = datetime_to_ms(to_tzaware_datetime(start, tz=get_utc_tz())) try: first_data = client.get_klines( symbol=symbol, interval=interval, limit=1, startTime=0, endTime=None ) first_valid_ts = first_data[0][0] next_start_ts = start_ts = max(start_ts, first_valid_ts) except: next_start_ts = start_ts end_ts = datetime_to_ms(to_tzaware_datetime(end, tz=get_utc_tz())) def _ts_to_str(ts: tp.DatetimeLike) -> str: return str(pd.Timestamp(to_tzaware_datetime(ts, tz=get_utc_tz()))) # Iteratively collect the data data: tp.List[list] = [] with tqdm(disable=not show_progress, **tqdm_kwargs) as pbar: pbar.set_description(_ts_to_str(start_ts)) while True: # Fetch the klines for the next interval next_data = client.get_klines( symbol=symbol, interval=interval, limit=limit, startTime=next_start_ts, endTime=end_ts ) if len(data) > 0: next_data = list(filter(lambda d: next_start_ts < d[0] < end_ts, next_data)) else: next_data = list(filter(lambda d: d[0] < end_ts, next_data)) # Update the timestamps and the progress bar if not len(next_data): break data += next_data pbar.set_description("{} - {}".format( _ts_to_str(start_ts), _ts_to_str(next_data[-1][0]) )) pbar.update(1) next_start_ts = next_data[-1][0] if delay is not None: time.sleep(delay / 1000) # be kind to api # Convert data to a DataFrame df = pd.DataFrame(data, columns=[ 'Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close time', 'Quote volume', 'Number of trades', 'Taker base volume', 'Taker quote volume', 'Ignore' ]) df.index = pd.to_datetime(df['Open time'], unit='ms', utc=True) del df['Open time'] df['Open'] = df['Open'].astype(float) df['High'] = df['High'].astype(float) df['Low'] = df['Low'].astype(float) df['Close'] = df['Close'].astype(float) df['Volume'] = df['Volume'].astype(float) df['Close time'] = pd.to_datetime(df['Close time'], unit='ms', utc=True) df['Quote volume'] = df['Quote volume'].astype(float) df['Number of trades'] = df['Number of trades'].astype(int) df['Taker base volume'] = df['Taker base volume'].astype(float) df['Taker quote volume'] = df['Taker quote volume'].astype(float) del df['Ignore'] return df