async def loop(self): if 'start_flush' in self.config.redis and self.config.redis[ 'start_flush']: LOG.info('Flushing cache') redis.Redis(self.config.redis['ip'], port=self.config.redis['port']).flushall() while True: start = time.time() delete = self.config.redis['del_after_read'] r = redis.Redis(self.config.redis['ip'], port=self.config.redis['port'], decode_responses=True) for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: for pair in self.config.exchanges[exchange][dtype]: key = f'{dtype}-{exchange}-{pair}' store = Storage(self.config) LOG.info('Reading %s', key) data = r.xread({ key: '0-0' if key not in self.last_id else self.last_id[key] }) if len(data) == 0: LOG.info('No data for %s', key) continue agg = [] ids = [] for update_id, update in data[0][1]: ids.append(update_id) agg.append(update) self.last_id[key] = ids[-1] store.aggregate(agg) store.write(exchange, dtype, pair, time.time()) if delete: r.xdel(key, *ids) LOG.info("Write Complete %s", key) total = time.time() - start interval = self.config.storage_interval - total if interval <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, self.config.storage_interval) interval = 0.5 await asyncio.sleep(interval)
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) while True: start = time.time() if 'exchanges' in self.config and self.config.exchanges: for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: if dtype in {'retries'}: continue for pair in self.config.exchanges[exchange][ dtype] if 'symbols' not in self.config.exchanges[ exchange][dtype] else self.config.exchanges[ exchange][dtype]['symbols']: store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - start interval = self.config.storage_interval - total if interval <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, self.config.storage_interval) interval = 0.5 await asyncio.sleep(interval) else: await asyncio.sleep(30)
def _write_pair_blocking(self, exchange, dtype, pair, start, end, write_on_stop): LOG.info('Reading cache for %s-%s-%s', exchange, dtype, pair) store = Storage(self.config, parquet_buffer=self.parquet_buffer) data = self.cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) return store.aggregate(data) retries = 0 while ((not self.terminating) or write_on_stop): if retries > self.config.storage_retries: LOG.error("Failed to write after %d retries", self.config.storage_retries) raise EngineWriteError try: # retrying this is ok, provided every # engine clears its internal buffer after writing successfully. LOG.info('Writing cached data to store for %s-%s-%s', exchange, dtype, pair) store.write(exchange, dtype, pair, time.time()) except OSError as e: LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) if write_on_stop: break if e.errno == 112: # Host is down retries += 1 self.event.wait(self.config.storage_retry_wait) else: raise except EngineWriteError: LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) if write_on_stop: break retries += 1 self.event.wait(self.config.storage_retry_wait) else: break LOG.info('Deleting cached data for %s-%s-%s', exchange, dtype, pair) self.cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
def _worker(self, exchange): r = Rest() storage = Storage(self.config) for pair in self.config.backfill[exchange]: try: start = self.config.backfill[exchange][pair].start while True: end = storage.get_start_date(exchange, 'trades', pair) if all(e for e in end): break time.sleep(10) ends = list( map( lambda x: Timestamp(x, unit='s') - Timedelta( microseconds=1), end)) if any(e <= Timestamp(start) for e in ends): LOG.info( "Data in storage is earlier than backfill start date for %s - %s", exchange, pair) continue LOG.info("Backfill - Starting for %s - %s for range %s - %s", exchange, pair, start, str(max(ends))) # Backfill from end date to start date, 1 day at a time, in reverse order (from end -> start) end = max(ends) start = Timestamp(start) while start < end: seg_start = end.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0) if start > seg_start: seg_start = start LOG.info("Backfill - Reading %s to %s for %s - %s", seg_start, end, exchange, pair) trades = [] try: for t in r[exchange].trades(pair, str(seg_start), str(end)): trades.extend(t) except Exception: LOG.warning( "Backfill - encountered error backfilling %s - %s, trying again...", exchange, pair, exc_info=True) time.sleep(300) continue if not trades: end = seg_start - Timedelta(nanoseconds=1) continue for trade in trades: trade['price'] = float(trade['price']) trade['amount'] = float(trade['amount']) def gen_pos(): counter = 0 while True: yield counter % len(ends) counter += 1 pos = gen_pos() ends_float = [x.timestamp() for x in ends] def timestamp_filter(data): boundary = ends_float[next(pos)] return list( filter(lambda x: x['timestamp'] < boundary, copy.copy(data))) storage.aggregate(trades, transform=timestamp_filter) storage.write(exchange, 'trades', pair, end.timestamp()) LOG.info("Backfill - Wrote %s to %s for %s - %s", seg_start, end, exchange, pair) end = seg_start - Timedelta(nanoseconds=1) LOG.info("Backfill for %s - %s completed", exchange, pair) except Exception: LOG.error("Backfill failed for %s - %s", exchange, pair, exc_info=True)
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier parquet_buffer = dict() while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: store = Storage(self.config, parquet_buffer=parquet_buffer) for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in self.config.exchanges[exchange][dtype] else self.config.exchanges[exchange][dtype]['symbols']: LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) retries = 0 while True: if retries > self.config.storage_retries: LOG.error("Failed to write after %d retries", self.config.storage_retries) raise EngineWriteError try: # retrying this is ok, provided every # engine clears its internal buffer after writing successfully. store.write(exchange, dtype, pair, time.time()) except OSError as e: if e.errno == 112: # Host is down LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e) retries += 1 await asyncio.sleep(self.config.storage_retry_wait) continue else: raise except EngineWriteError: retries += 1 await asyncio.sleep(self.config.storage_retry_wait) continue else: break cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: for exchange in self.config.exchanges: for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue for pair in self.config.exchanges[exchange][ dtype] if 'symbols' not in self.config.exchanges[ exchange][ dtype] else self.config.exchanges[ exchange][dtype]['symbols']: store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue store.aggregate(data) store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise
def _worker(self, exchange): r = Rest() storage = Storage(self.config) for pair in self.config.backfill[exchange]: try: start = self.config.backfill[exchange][pair].start while True: end = storage.get_start_date(exchange, 'trades', pair) if not all(e == end[0] for e in end): raise InconsistentStorage( "Stored data differs, cannot backfill") end = end[0] if end: break time.sleep(10) end = Timestamp(end, unit='s') end -= Timedelta(microseconds=1) start = Timestamp(start) if end <= Timestamp(start): LOG.info( "Data in storage is earlier than backfill start date for %s - %s", exchange, pair) continue LOG.info("Backfill - Starting for %s - %s for range %s - %s", exchange, pair, start, str(end)) # Backfill from end date to start date, 1 day at a time, in reverse order (from end -> start) while start < end: seg_start = end.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0) if start > seg_start: seg_start = start LOG.info("Backfill - Reading %s to %s for %s - %s", seg_start, end, exchange, pair) trades = [] try: for t in r[exchange].trades(pair, str(seg_start), str(end)): trades.extend(t) except Exception: LOG.warning( "Backfill - encountered error backfilling %s - %s, trying again...", exchange, pair, exc_info=True) time.sleep(300) continue if not trades: end = seg_start - Timedelta(nanoseconds=1) continue storage.aggregate(trades) storage.write(exchange, 'trades', pair, end.timestamp()) LOG.info("Backfill - Wrote %s to %s for %s - %s", seg_start, end, exchange, pair) end = seg_start - Timedelta(nanoseconds=1) LOG.info("Backfill for %s - %s completed", exchange, pair) except Exception: LOG.error("Backfill failed for %s - %s", exchange, pair, exc_info=True)
async def loop(self): if self.config.cache == 'redis': cache = Redis(ip=self.config.redis['ip'], port=self.config.redis['port'], socket=self.config.redis.socket, del_after_read=self.config.redis['del_after_read'], flush=self.config.redis['start_flush'], retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None) elif self.config.cache == 'kafka': cache = Kafka(self.config.kafka['ip'], self.config.kafka['port'], flush=self.config.kafka['start_flush']) self.store = Storage(self.config) ### tmp interval = self.config.storage_interval time_partition = False multiplier = 1 if not isinstance(interval, int): if len(interval) > 1: multiplier = int(interval[:-1]) interval = interval[-1] base_interval = interval if interval in {'M', 'H', 'D'}: time_partition = True if interval == 'M': interval = 60 * multiplier elif interval == 'H': interval = 3600 * multiplier else: interval = 86400 * multiplier stats_cache = {} for exchange in self.config.exchanges: stats_cache[exchange] = {} for pair in self.config.exchanges[exchange][TRADES]: stats_cache[exchange][pair] = init_cache( InfluxConfig( db='crypto', host='http://localhost:8086', exchange=exchange, pair=pair, )) while True: start, end = None, None try: aggregation_start = time.time() if time_partition: interval_start = aggregation_start if end: interval_start = end + timedelta(seconds=interval + 1) start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier) if 'exchanges' in self.config and self.config.exchanges: data_arb = {} for exchange in self.config.exchanges: stats_all = [ ] ### Stats from each loop iter stored here data_all = [] ### Data... "" data_arb[exchange] = {} for dtype in self.config.exchanges[exchange]: # Skip over the retries arg in the config if present. if dtype in {'retries', 'channel_timeouts'}: continue # for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \ # self.config.exchanges[exchange][ # dtype] else \ # self.config.exchanges[exchange][dtype]['symbols']: for pair in self.config.exchanges[exchange][ dtype]: ### tmp # store = Storage(self.config) LOG.info('Reading %s-%s-%s', exchange, dtype, pair) data = cache.read(exchange, dtype, pair, start=start, end=end) data_all.append(data) data_arb[exchange][pair] = data if len(data) == 0: LOG.info('No data for %s-%s-%s', exchange, dtype, pair) continue # if dtype == TRADES: stats_all.append( self.collect_stats( data, exchange, pair, stats_cache)) # LOG.info('HAVING trades') # stats_to_write = [] # for trade in data: # if 'id' not in trade: # trade['id'] = None # typed_trade = Trade(**trade) # update_stats(stats_cache[exchange][pair], typed_trade, stats_to_write) # LOG.info('DONE computing stats for %s-%s', exchange, pair) # store.aggregate(stats_to_write) # store.write(exchange, STATS, pair, time.time()) # # self.store.aggregate(data) # self.store.write(exchange, dtype, pair, time.time()) cache.delete(exchange, dtype, pair) # LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair) # self.store.aggregate(stats_all) self.store.write(exchange, STATS, pair, time.time()) if any(data_all): self.store.aggregate(data_all) self.store.write(exchange, dtype, pair, time.time()) if data_arb: await self.write_arbs(data_arb) # total = time.time() - aggregation_start wait = interval - total if wait <= 0: LOG.warning( "Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval) wait = 0.5 else: LOG.warning( f"Storage operations took {total}s, interval {interval}s" ) await asyncio.sleep(wait) else: await asyncio.sleep(30) except Exception: LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True) raise