def test_pull_candles(self): exchange = "bitfinex" symbol = "fUSD" interval = "1m" start = 1590889920.0 end = 1590891120.0 db = "test_" + exchange influx = Candles(exchange, symbol, interval, create_if_missing=True) if [x for x in influx.client.get_list_database() if x["name"] == db]: assert db.startswith( "test_" ), "DB name doesn't start with 'test_'; aborting to avoid dropping the prod db..." logger.info("dropping existing database: %s" % db) influx.client.drop_database(db) influx.client.create_database(db) funding_candles = open("tests/data/candles_fUSD.json", "r").read() with freeze_time(arrow.get(end).datetime): # load all data in, insert in influx, verify rows count client = get_sync_candles_class(exchange=exchange, symbol=symbol, interval=interval, start=start) with mock() as m: m.register_uri( "GET", re.compile(r"api-pub.bitfinex.com\/v2.*"), text=funding_candles, ) m.register_uri(ANY, re.compile(r"localhost:8086.*"), real_http=True) client.pull_data() assert len(influx.get("*")) == 290 influx.client.query("DROP SERIES FROM /.*/") with freeze_time(arrow.get(end).datetime): # verify start/end dates are sent to exchange properly: client = get_sync_candles_class(exchange=exchange, symbol=symbol, interval=interval, start=start, end=end) with mock() as m: m.register_uri("GET", re.compile(r"api-pub.bitfinex.com\/v2.*"), text=funding_candles) m.register_uri(ANY, re.compile(r"localhost:8086.*"), real_http=True) client.pull_data() reqs = [x for x in m.request_history if x.hostname.startswith("api-pub.bitfinex")] first_exchange_req = reqs[0] last_exchange_req = reqs[1] assert arrow.get(float(first_exchange_req.qs["start"][0]) / 1000).timestamp == start assert arrow.get(float(last_exchange_req.qs["end"][0]) / 1000).timestamp == end start = 1590889920.0 end = 1590890040.0 influx.client.query("DROP SERIES FROM /.*/") with freeze_time(arrow.get(end).datetime): # with only start specified, verify end is now() - the frozen time client = get_sync_candles_class(exchange=exchange, symbol=symbol, interval=interval, start=start) with mock() as m: m.register_uri("GET", re.compile(r"api-pub.bitfinex.com\/v2.*"), text=funding_candles) m.register_uri(ANY, re.compile(r"localhost:8086.*"), real_http=True) client.pull_data() reqs = [x for x in m.request_history if x.hostname.startswith("api-pub.bitfinex")] first_exchange_req = reqs[0] last_exchange_req = reqs[1] assert arrow.get(float(first_exchange_req.qs["start"][0]) / 1000).timestamp == start assert arrow.get(float(last_exchange_req.qs["end"][0]) / 1000).timestamp == end
def test_get_percentile(self): """ Test percentile query """ exchange = "bitfinex" symbol = "fUSD" interval = "1m" influx = Candles(exchange, symbol, interval) start = 1590889920.0 end = 1590891120.0 res = influx.get_percentile("high", 95) assert res > 0 res = influx.get_percentile("close", 95, start=start, end=end) assert res > 0
def __init__(self, symbol, interval, start=None, end=None, host=None): self.candle_order = None self.symbol = symbol self.interval = interval if start: self.start = arrow.get(start).timestamp if end: self.end = arrow.get(end).timestamp self.influx_client = Candles(self.EXCHANGE, self.symbol, self.interval, create_if_missing=True, host=host) self.client = self.api_client()
def test_pull_candles(self): exchange = "sfox" symbol = "BTCUSD" interval = "1m" start = 1611594060 end = 1611595781 db = "test_" + exchange influx = Candles(exchange, symbol, interval, create_if_missing=True) if [x for x in influx.client.get_list_database() if x["name"] == db]: assert db.startswith( "test_" ), "DB name doesn't start with 'test_'; aborting to avoid dropping the prod db..." logger.info("dropping existing database: %s" % db) influx.client.drop_database(db) influx.client.create_database(db) btc_candles = open("tests/data/candles_sfox_btcusd.json", "r").read() with freeze_time(arrow.get(end).datetime): # load all data in, insert in influx, verify rows count client = get_sync_candles_class(exchange=exchange, symbol=symbol, interval=interval, start=start) with mock() as m: m.register_uri( "GET", re.compile(r"chartdata.sfox.com\/.*"), text=btc_candles, ) m.register_uri(ANY, re.compile(r"localhost:8086.*"), real_http=True) client.pull_data() assert len(influx.get("*")) == 29 influx.client.query("DROP SERIES FROM /.*/") with freeze_time(arrow.get(end).datetime): # verify start/end dates are sent to exchange properly: client = get_sync_candles_class(exchange=exchange, symbol=symbol, interval=interval, start=start, end=end) with mock() as m: m.register_uri("GET", re.compile(r"chartdata.sfox.com\/.*"), text=btc_candles) m.register_uri(ANY, re.compile(r"localhost:8086.*"), real_http=True) client.pull_data() reqs = [x for x in m.request_history if x.hostname.startswith("chartdata.sfox.com")] exchange_req = reqs[0] # this only takes 1 query on sfox # NOTE: we aren't dividing by 1000 here, as it isn't being fetched form influx with precision=ms. # Exchange data is seconds. assert int(arrow.get(float(exchange_req.qs["starttime"][0])).timestamp) == start assert int(arrow.get(float(exchange_req.qs["endtime"][0])).timestamp) == end
def test_get_lowhigh(self): """ Test fetching actual data that was populated in previous tests (of syncs) """ exchange = "bitfinex" symbol = "fUSD" interval = "1m" influx = Candles(exchange, symbol, interval) start = 1590889920.0 end = 1590891120.0 low, high = influx.get_lowhigh() assert low > 0.0 and high > 0.0 # exercise code path for start/end dates specified in each combination start = 1590889920.0 end = None low, high = influx.get_lowhigh(start=start, end=end) assert low > 0.0 and high > 0.0 start = None end = 1590891120.0 low, high = influx.get_lowhigh(start=start, end=end) assert low > 0.0 and high > 0.0 start = 1590889920 end = 1590891120.0 low, high = influx.get_lowhigh(start=start, end=end) assert low > 0.0 and high > 0.0
def test_sfox_get(self): exchange = "sfox" symbol = "ETHUSD" interval = "1m" influx = Candles(exchange, symbol, interval) start = 1590889920.0 end = 1590891120.0 fh = open("tests/data/lowhigh_candles.json", "r") with mock() as m: m.register_uri("GET", re.compile(r"localhost:8086"), text=fh.read()) res = influx.get("*") assert res is not None res = influx.get("*", start=start) assert res is not None res = influx.get("*", end=end) assert res is not None res = influx.get("*", start=start, end=end) assert res is not None
class BaseSyncCandles(object): """ Base class for syncing candles NOTE candles must come in oldest->newest order. Thanks. """ BIN_SIZES = {"1m": 1, "1h": 60, "1d": 1440} API_MAX_RECORDS = 10000 EXCHANGE = None DEFAULT_SYNC_DAYS = 90 start = end = client = None def __init__(self, symbol, interval, start=None, end=None, host=None): self.candle_order = None self.symbol = symbol self.interval = interval if start: self.start = arrow.get(start).timestamp if end: self.end = arrow.get(end).timestamp self.influx_client = Candles(self.EXCHANGE, self.symbol, self.interval, create_if_missing=True, host=host) self.client = self.api_client() def api_client(self): "Abstract Method: must be implemented in the child class, and populate self.client " "" raise NotImplementedError def call_api(self): "Abstract Method: must be implemented in the child class " "" raise NotImplementedError def get_earliest_latest_timestamps_in_db(self): """ Returns (earliest,latest) timestamp in the database for the current symbol/interval, or 0 if there isn't one """ query = "SELECT open,time FROM candles_{} WHERE symbol=$symbol".format( self.interval) params = {"symbol": self.symbol} latest = self.influx_client.query(query + " ORDER BY time DESC LIMIT 1", bind_params=params) earliest = self.influx_client.query(query + " ORDER BY time ASC LIMIT 1", bind_params=params) # NOTE: all candles are stored in influx as ms. So convert to s when retrieving, and later we up-convert to ms # or us if required by the exchange. When querying manually, run influx with: `influx -precision=ms` /!\ earliest = arrow.get(list(earliest)[0][0]["time"] / 1000).timestamp if earliest else 0 latest = arrow.get(list(latest)[0][0]["time"] / 1000).timestamp if latest else 0 return earliest, latest def get_iterations_for_range(self, batch_limit): """ Given a start, end timstamp, return the incremental steps required to fill in data, for a specified batch_limit, along with a new start and end time. If the start is earlier than the earliest in the DB, it will return the first range so that the beginning of the gap is filled, and also return fetch_again_from_ts=latest so the caller knows to fetch again from there, to fill in the missing "latest to now" data as well. """ assert self.start and self.end, "Start and End times are not defined!" new_start = self.start new_end = self.end delta_mins = 0 fetch_again_from_ts = False earliest, latest = self.get_earliest_latest_timestamps_in_db() if latest == 0: # no existing data delta_mins = abs((self.end - self.start)) / 60 elif self.start < earliest: delta_mins = abs(self.start - earliest) / 60 # only from start to earliest new_end = earliest fetch_again_from_ts = latest else: delta_mins = abs((latest - (self.end))) / 60 new_start = latest data_to_fetch = math.ceil(delta_mins / self.BIN_SIZES[self.interval]) return math.ceil(data_to_fetch / batch_limit), new_start, new_end, fetch_again_from_ts def write_candles(self, candles, extra_tags=None, timestamp_units="ms"): """ Writes candle data to influxdb. """ out = [] tags = {"symbol": self.symbol, "interval": self.interval} if extra_tags: assert "symbol" not in extra_tags, "Not allowed to override symbol when you've already instantiated" " the class with a specific symbol." assert "interval" not in extra_tags, "Not allowed to override interval when you've already instantiated" " the class with a specific interval." tags.update(extra_tags) for c in candles: if isinstance(c, dict): c = list(c.values() ) # sfox API returns ordered dicts instead of a list _open = float(c[self.candle_order["open"]]) _high = float(c[self.candle_order["high"]]) _low = float(c[self.candle_order["low"]]) _close = float(c[self.candle_order["close"]]) _volume = float(c[self.candle_order["volume"]]) assert _low <= _high, f"Low price must be <= the High price. Candle: {c}" assert _low <= _close, f"Low price must be <= the Close price. Candle: {c}" assert _high >= _open, f"High price must be <= the Open price. Candle: {c}" if timestamp_units == "s": # write in ms, as that's how we query _time = int(c[self.candle_order["ts"]] * 1e3) else: _time = int(c[self.candle_order["ts"]]) out.append({ "measurement": "candles_" + self.interval, "tags": tags, "time": _time, "fields": { "open": _open, "high": _high, "low": _low, "close": _close, "volume": _volume }, }) self.influx_client.write_points(out) @staticmethod def timestamp_ranges(start, end, steps): """ Returns a list of timestamps for the (start,end) range, representing the steps needed to iterate through. You'll need to iterate pairwise... use a magic zip: >>> zip([1,2,3,4,5,6], [1,2,3,4,5,6][1:]) [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6)] """ start = arrow.get(start) end = arrow.get(end) if not steps: return start, end diff = (end - start) / steps for i in range(steps): yield int((start + diff * i).timestamp) yield int(end.timestamp) def sync(self, endpoint, extra_params={}, extra_tags=None, start_format="start", end_format="end", timestamp_units="ms"): """ Pulls data from the exchange, and assumes it takes params: limit, start, end extra_params: will be added to each exchange request extra_tags: added to each measurement written to influx """ assert not any(key in extra_params for key in ["limit", "start", "end"]), "Cannot" " override the following params: limit, start, end" if self.start and not self.end: self.end = int(datetime.datetime.now().timestamp()) if not self.start and not self.end: now = datetime.datetime.now() self.start = int( (now - datetime.timedelta(days=self.DEFAULT_SYNC_DAYS)).timestamp()) self.end = int(now.timestamp()) steps, start, end, fetch_again = self.get_iterations_for_range( self.API_MAX_RECORDS) if start > end: logger.debug( f"Nothing to sync, as we have already have {arrow.get(start).isoformat()} in the database, " f"and end date {arrow.get(end).isoformat()} was selected.") return logger.debug( "Using the following time ranges to complete the sync: {} to {}". format(start, end)) time_steps = list(self.timestamp_ranges(start, end, steps)) self.do_fetch( time_steps, start, end, endpoint, extra_params, extra_tags, start_format=start_format, end_format=end_format, timestamp_units=timestamp_units, ) # Date requested was before latest in the db, so the first fetch grabbed start->earliest_in_db, # and this one will grab latests_in_db->now. if fetch_again: logger.debug( "Fetching again, this time from the latest in the db, to now()" ) self.start = fetch_again self.end = int(datetime.datetime.now().timestamp()) steps, start, end, _ = self.get_iterations_for_range( self.API_MAX_RECORDS) time_steps = list(self.timestamp_ranges(start, end, steps)) self.do_fetch( time_steps, start, end, endpoint, extra_params, extra_tags, start_format=start_format, end_format=end_format, timestamp_units=timestamp_units, ) def do_fetch( self, time_steps, start, end, endpoint, extra_params, extra_tags, start_format="start", end_format="end", timestamp_units="ms", ): for start, end in zip(time_steps, time_steps[1:]): formatted_start = start # formatted for exchange API calls formatted_end = end if timestamp_units == "ms": formatted_start *= 1e3 formatted_end *= 1e3 elif timestamp_units == "us": formatted_start *= 1e6 formatted_end *= 1e6 params = { "limit": self.API_MAX_RECORDS, start_format: int(formatted_start), end_format: int(formatted_end) } if extra_params: params.update(extra_params) logger.debug("Pulling {} from {} for {} from {} to {}".format( endpoint, self.EXCHANGE, self.symbol, arrow.get(start).format("YYYY-MM-DD HH:mm:ss"), arrow.get(end).format("YYYY-MM-DD HH:mm:ss"), )) res = self.call_api(endpoint, params) self.write_candles(res, extra_tags, timestamp_units)