Example #1
0
    async def loop(self, loop):
        if self.config.cache == 'redis':
            self.cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          password=os.environ.get('REDIS_PASSWORD', None) or self.config.redis.get('password', None),
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            self.cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        self.parquet_buffer = dict()
        while not self.terminating:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    await self._write_storage(loop=loop, start=start, end=end)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    try:
                        await asyncio.sleep(delay=wait, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
                else:
                    try:
                        await asyncio.sleep(delay=30, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise

        LOG.info("Aggregator running on PID %d stopped", os.getpid())
Example #2
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][
                                    dtype] if 'symbols' not in self.config.exchanges[
                                        exchange][
                                            dtype] else self.config.exchanges[
                                                exchange][dtype]['symbols']:
                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue

                                store.aggregate(data)
                                store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange,
                                         dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise
Example #3
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        parquet_buffer = dict()
        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    store = Storage(self.config, parquet_buffer=parquet_buffer)
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in self.config.exchanges[exchange][dtype] else self.config.exchanges[exchange][dtype]['symbols']:
                                LOG.info('Reading %s-%s-%s', exchange, dtype, pair)
                                data = cache.read(exchange, dtype, pair, start=start, end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange, dtype, pair)
                                    continue

                                store.aggregate(data)

                                retries = 0
                                while True:
                                    if retries > self.config.storage_retries:
                                        LOG.error("Failed to write after %d retries", self.config.storage_retries)
                                        raise EngineWriteError

                                    try:
                                        # retrying this is ok, provided every
                                        # engine clears its internal buffer after writing successfully.
                                        store.write(exchange, dtype, pair, time.time())
                                    except OSError as e:
                                        if e.errno == 112:  # Host is down
                                            LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                                            retries += 1
                                            await asyncio.sleep(self.config.storage_retry_wait)
                                            continue
                                        else:
                                            raise

                                    except EngineWriteError:
                                        retries += 1
                                        await asyncio.sleep(self.config.storage_retry_wait)
                                        continue
                                    else:
                                        break

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise
Example #4
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])
        self.store = Storage(self.config)  ### tmp

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        stats_cache = {}
        for exchange in self.config.exchanges:
            stats_cache[exchange] = {}
            for pair in self.config.exchanges[exchange][TRADES]:
                stats_cache[exchange][pair] = init_cache(
                    InfluxConfig(
                        db='crypto',
                        host='http://localhost:8086',
                        exchange=exchange,
                        pair=pair,
                    ))

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    data_arb = {}
                    for exchange in self.config.exchanges:
                        stats_all = [
                        ]  ### Stats from each loop iter stored here
                        data_all = []  ### Data... ""
                        data_arb[exchange] = {}
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
#                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \
#                                                                                  self.config.exchanges[exchange][
#                                                                                      dtype] else \
#                                    self.config.exchanges[exchange][dtype]['symbols']:
                            for pair in self.config.exchanges[exchange][
                                    dtype]:  ### tmp
                                #                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                data_all.append(data)
                                data_arb[exchange][pair] = data
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue
                                #
                                if dtype == TRADES:
                                    stats_all.append(
                                        self.collect_stats(
                                            data, exchange, pair, stats_cache))
#                                    LOG.info('HAVING trades')
#                                    stats_to_write = []
#                                    for trade in data:
#                                        if 'id' not in trade:
#                                            trade['id'] = None
#                                        typed_trade = Trade(**trade)
#                                        update_stats(stats_cache[exchange][pair], typed_trade, stats_to_write)
#                                    LOG.info('DONE computing stats for %s-%s', exchange, pair)
#                                    store.aggregate(stats_to_write)
#                                    store.write(exchange, STATS, pair, time.time())
#
#                                self.store.aggregate(data)
#                                self.store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
#                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
#
                    self.store.aggregate(stats_all)
                    self.store.write(exchange, STATS, pair, time.time())
                    if any(data_all):
                        self.store.aggregate(data_all)
                        self.store.write(exchange, dtype, pair, time.time())
                    if data_arb:
                        await self.write_arbs(data_arb)
                    #
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    else:
                        LOG.warning(
                            f"Storage operations took {total}s, interval {interval}s"
                        )
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise