async def loop(self):
        if 'start_flush' in self.config.redis and self.config.redis[
                'start_flush']:
            LOG.info('Flushing cache')
            redis.Redis(self.config.redis['ip'],
                        port=self.config.redis['port']).flushall()

        while True:
            start = time.time()
            delete = self.config.redis['del_after_read']
            r = redis.Redis(self.config.redis['ip'],
                            port=self.config.redis['port'],
                            decode_responses=True)
            for exchange in self.config.exchanges:
                for dtype in self.config.exchanges[exchange]:
                    for pair in self.config.exchanges[exchange][dtype]:
                        key = f'{dtype}-{exchange}-{pair}'
                        store = Storage(self.config)
                        LOG.info('Reading %s', key)

                        data = r.xread({
                            key:
                            '0-0'
                            if key not in self.last_id else self.last_id[key]
                        })

                        if len(data) == 0:
                            LOG.info('No data for %s', key)
                            continue

                        agg = []
                        ids = []
                        for update_id, update in data[0][1]:
                            ids.append(update_id)
                            agg.append(update)

                        self.last_id[key] = ids[-1]

                        store.aggregate(agg)
                        store.write(exchange, dtype, pair, time.time())
                        if delete:
                            r.xdel(key, *ids)
                        LOG.info("Write Complete %s", key)

            total = time.time() - start
            interval = self.config.storage_interval - total
            if interval <= 0:
                LOG.warning(
                    "Storage operations currently take %.1f seconds, longer than the interval of %d",
                    total, self.config.storage_interval)
                interval = 0.5
            await asyncio.sleep(interval)
Example #2
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        while True:
            start = time.time()
            if 'exchanges' in self.config and self.config.exchanges:
                for exchange in self.config.exchanges:
                    for dtype in self.config.exchanges[exchange]:
                        if dtype in {'retries'}:
                            continue
                        for pair in self.config.exchanges[exchange][
                                dtype] if 'symbols' not in self.config.exchanges[
                                    exchange][dtype] else self.config.exchanges[
                                        exchange][dtype]['symbols']:
                            store = Storage(self.config)
                            LOG.info('Reading %s-%s-%s', exchange, dtype, pair)

                            data = cache.read(exchange, dtype, pair)
                            if len(data) == 0:
                                LOG.info('No data for %s-%s-%s', exchange,
                                         dtype, pair)
                                continue

                            store.aggregate(data)
                            store.write(exchange, dtype, pair, time.time())

                            cache.delete(exchange, dtype, pair)
                            LOG.info('Write Complete %s-%s-%s', exchange,
                                     dtype, pair)

                total = time.time() - start
                interval = self.config.storage_interval - total
                if interval <= 0:
                    LOG.warning(
                        "Storage operations currently take %.1f seconds, longer than the interval of %d",
                        total, self.config.storage_interval)
                    interval = 0.5
                await asyncio.sleep(interval)
            else:
                await asyncio.sleep(30)
Example #3
0
    def _write_pair_blocking(self, exchange, dtype, pair, start, end, write_on_stop):
        LOG.info('Reading cache for %s-%s-%s', exchange, dtype, pair)
        store = Storage(self.config, parquet_buffer=self.parquet_buffer)
        data = self.cache.read(exchange, dtype, pair, start=start, end=end)
        if len(data) == 0:
            LOG.info('No data for %s-%s-%s', exchange, dtype, pair)
            return

        store.aggregate(data)

        retries = 0
        while ((not self.terminating) or write_on_stop):
            if retries > self.config.storage_retries:
                LOG.error("Failed to write after %d retries", self.config.storage_retries)
                raise EngineWriteError

            try:
                # retrying this is ok, provided every
                # engine clears its internal buffer after writing successfully.
                LOG.info('Writing cached data to store for %s-%s-%s', exchange, dtype, pair)
                store.write(exchange, dtype, pair, time.time())
            except OSError as e:
                LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                if write_on_stop:
                    break
                if e.errno == 112:  # Host is down
                    retries += 1
                    self.event.wait(self.config.storage_retry_wait)
                else:
                    raise

            except EngineWriteError:
                LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                if write_on_stop:
                    break
                retries += 1
                self.event.wait(self.config.storage_retry_wait)
            else:
                break

        LOG.info('Deleting cached data for %s-%s-%s', exchange, dtype, pair)
        self.cache.delete(exchange, dtype, pair)
        LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
Example #4
0
    def _worker(self, exchange):
        r = Rest()
        storage = Storage(self.config)
        for pair in self.config.backfill[exchange]:
            try:
                start = self.config.backfill[exchange][pair].start

                while True:
                    end = storage.get_start_date(exchange, 'trades', pair)

                    if all(e for e in end):
                        break
                    time.sleep(10)
                ends = list(
                    map(
                        lambda x: Timestamp(x, unit='s') - Timedelta(
                            microseconds=1), end))

                if any(e <= Timestamp(start) for e in ends):
                    LOG.info(
                        "Data in storage is earlier than backfill start date for %s - %s",
                        exchange, pair)
                    continue

                LOG.info("Backfill - Starting for %s - %s for range %s - %s",
                         exchange, pair, start, str(max(ends)))

                # Backfill from end date to start date, 1 day at a time, in reverse order (from end -> start)
                end = max(ends)
                start = Timestamp(start)
                while start < end:
                    seg_start = end.replace(hour=0,
                                            minute=0,
                                            second=0,
                                            microsecond=0,
                                            nanosecond=0)
                    if start > seg_start:
                        seg_start = start
                    LOG.info("Backfill - Reading %s to %s for %s - %s",
                             seg_start, end, exchange, pair)

                    trades = []
                    try:
                        for t in r[exchange].trades(pair, str(seg_start),
                                                    str(end)):
                            trades.extend(t)
                    except Exception:
                        LOG.warning(
                            "Backfill - encountered error backfilling %s - %s, trying again...",
                            exchange,
                            pair,
                            exc_info=True)
                        time.sleep(300)
                        continue

                    if not trades:
                        end = seg_start - Timedelta(nanoseconds=1)
                        continue

                    for trade in trades:
                        trade['price'] = float(trade['price'])
                        trade['amount'] = float(trade['amount'])

                    def gen_pos():
                        counter = 0
                        while True:
                            yield counter % len(ends)
                            counter += 1

                    pos = gen_pos()
                    ends_float = [x.timestamp() for x in ends]

                    def timestamp_filter(data):
                        boundary = ends_float[next(pos)]
                        return list(
                            filter(lambda x: x['timestamp'] < boundary,
                                   copy.copy(data)))

                    storage.aggregate(trades, transform=timestamp_filter)
                    storage.write(exchange, 'trades', pair, end.timestamp())
                    LOG.info("Backfill - Wrote %s to %s for %s - %s",
                             seg_start, end, exchange, pair)
                    end = seg_start - Timedelta(nanoseconds=1)
                LOG.info("Backfill for %s - %s completed", exchange, pair)
            except Exception:
                LOG.error("Backfill failed for %s - %s",
                          exchange,
                          pair,
                          exc_info=True)
Example #5
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        parquet_buffer = dict()
        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    store = Storage(self.config, parquet_buffer=parquet_buffer)
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in self.config.exchanges[exchange][dtype] else self.config.exchanges[exchange][dtype]['symbols']:
                                LOG.info('Reading %s-%s-%s', exchange, dtype, pair)
                                data = cache.read(exchange, dtype, pair, start=start, end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange, dtype, pair)
                                    continue

                                store.aggregate(data)

                                retries = 0
                                while True:
                                    if retries > self.config.storage_retries:
                                        LOG.error("Failed to write after %d retries", self.config.storage_retries)
                                        raise EngineWriteError

                                    try:
                                        # retrying this is ok, provided every
                                        # engine clears its internal buffer after writing successfully.
                                        store.write(exchange, dtype, pair, time.time())
                                    except OSError as e:
                                        if e.errno == 112:  # Host is down
                                            LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                                            retries += 1
                                            await asyncio.sleep(self.config.storage_retry_wait)
                                            continue
                                        else:
                                            raise

                                    except EngineWriteError:
                                        retries += 1
                                        await asyncio.sleep(self.config.storage_retry_wait)
                                        continue
                                    else:
                                        break

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise
Example #6
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][
                                    dtype] if 'symbols' not in self.config.exchanges[
                                        exchange][
                                            dtype] else self.config.exchanges[
                                                exchange][dtype]['symbols']:
                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue

                                store.aggregate(data)
                                store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange,
                                         dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise
Example #7
0
    def _worker(self, exchange):
        r = Rest()
        storage = Storage(self.config)
        for pair in self.config.backfill[exchange]:
            try:
                start = self.config.backfill[exchange][pair].start

                while True:
                    end = storage.get_start_date(exchange, 'trades', pair)
                    if not all(e == end[0] for e in end):
                        raise InconsistentStorage(
                            "Stored data differs, cannot backfill")
                    end = end[0]
                    if end:
                        break
                    time.sleep(10)
                end = Timestamp(end, unit='s')
                end -= Timedelta(microseconds=1)
                start = Timestamp(start)
                if end <= Timestamp(start):
                    LOG.info(
                        "Data in storage is earlier than backfill start date for %s - %s",
                        exchange, pair)
                    continue

                LOG.info("Backfill - Starting for %s - %s for range %s - %s",
                         exchange, pair, start, str(end))

                # Backfill from end date to start date, 1 day at a time, in reverse order (from end -> start)
                while start < end:
                    seg_start = end.replace(hour=0,
                                            minute=0,
                                            second=0,
                                            microsecond=0,
                                            nanosecond=0)
                    if start > seg_start:
                        seg_start = start
                    LOG.info("Backfill - Reading %s to %s for %s - %s",
                             seg_start, end, exchange, pair)

                    trades = []
                    try:
                        for t in r[exchange].trades(pair, str(seg_start),
                                                    str(end)):
                            trades.extend(t)
                    except Exception:
                        LOG.warning(
                            "Backfill - encountered error backfilling %s - %s, trying again...",
                            exchange,
                            pair,
                            exc_info=True)
                        time.sleep(300)
                        continue

                    if not trades:
                        end = seg_start - Timedelta(nanoseconds=1)
                        continue

                    storage.aggregate(trades)
                    storage.write(exchange, 'trades', pair, end.timestamp())
                    LOG.info("Backfill - Wrote %s to %s for %s - %s",
                             seg_start, end, exchange, pair)
                    end = seg_start - Timedelta(nanoseconds=1)
                LOG.info("Backfill for %s - %s completed", exchange, pair)
            except Exception:
                LOG.error("Backfill failed for %s - %s",
                          exchange,
                          pair,
                          exc_info=True)
Example #8
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])
        self.store = Storage(self.config)  ### tmp

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        stats_cache = {}
        for exchange in self.config.exchanges:
            stats_cache[exchange] = {}
            for pair in self.config.exchanges[exchange][TRADES]:
                stats_cache[exchange][pair] = init_cache(
                    InfluxConfig(
                        db='crypto',
                        host='http://localhost:8086',
                        exchange=exchange,
                        pair=pair,
                    ))

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    data_arb = {}
                    for exchange in self.config.exchanges:
                        stats_all = [
                        ]  ### Stats from each loop iter stored here
                        data_all = []  ### Data... ""
                        data_arb[exchange] = {}
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
#                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \
#                                                                                  self.config.exchanges[exchange][
#                                                                                      dtype] else \
#                                    self.config.exchanges[exchange][dtype]['symbols']:
                            for pair in self.config.exchanges[exchange][
                                    dtype]:  ### tmp
                                #                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                data_all.append(data)
                                data_arb[exchange][pair] = data
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue
                                #
                                if dtype == TRADES:
                                    stats_all.append(
                                        self.collect_stats(
                                            data, exchange, pair, stats_cache))
#                                    LOG.info('HAVING trades')
#                                    stats_to_write = []
#                                    for trade in data:
#                                        if 'id' not in trade:
#                                            trade['id'] = None
#                                        typed_trade = Trade(**trade)
#                                        update_stats(stats_cache[exchange][pair], typed_trade, stats_to_write)
#                                    LOG.info('DONE computing stats for %s-%s', exchange, pair)
#                                    store.aggregate(stats_to_write)
#                                    store.write(exchange, STATS, pair, time.time())
#
#                                self.store.aggregate(data)
#                                self.store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
#                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
#
                    self.store.aggregate(stats_all)
                    self.store.write(exchange, STATS, pair, time.time())
                    if any(data_all):
                        self.store.aggregate(data_all)
                        self.store.write(exchange, dtype, pair, time.time())
                    if data_arb:
                        await self.write_arbs(data_arb)
                    #
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    else:
                        LOG.warning(
                            f"Storage operations took {total}s, interval {interval}s"
                        )
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise