Exemple #1
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        while True:
            start = time.time()
            if 'exchanges' in self.config and self.config.exchanges:
                for exchange in self.config.exchanges:
                    for dtype in self.config.exchanges[exchange]:
                        if dtype in {'retries'}:
                            continue
                        for pair in self.config.exchanges[exchange][
                                dtype] if 'symbols' not in self.config.exchanges[
                                    exchange][dtype] else self.config.exchanges[
                                        exchange][dtype]['symbols']:
                            store = Storage(self.config)
                            LOG.info('Reading %s-%s-%s', exchange, dtype, pair)

                            data = cache.read(exchange, dtype, pair)
                            if len(data) == 0:
                                LOG.info('No data for %s-%s-%s', exchange,
                                         dtype, pair)
                                continue

                            store.aggregate(data)
                            store.write(exchange, dtype, pair, time.time())

                            cache.delete(exchange, dtype, pair)
                            LOG.info('Write Complete %s-%s-%s', exchange,
                                     dtype, pair)

                total = time.time() - start
                interval = self.config.storage_interval - total
                if interval <= 0:
                    LOG.warning(
                        "Storage operations currently take %.1f seconds, longer than the interval of %d",
                        total, self.config.storage_interval)
                    interval = 0.5
                await asyncio.sleep(interval)
            else:
                await asyncio.sleep(30)
Exemple #2
0
    async def loop(self, loop):
        if self.config.cache == 'redis':
            self.cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          password=os.environ.get('REDIS_PASSWORD', None) or self.config.redis.get('password', None),
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            self.cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        self.parquet_buffer = dict()
        while not self.terminating:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    await self._write_storage(loop=loop, start=start, end=end)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    try:
                        await asyncio.sleep(delay=wait, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
                else:
                    try:
                        await asyncio.sleep(delay=30, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise

        LOG.info("Aggregator running on PID %d stopped", os.getpid())
Exemple #3
0
class Aggregator(Process):
    def __init__(self, config_file=None):
        self.config_file = config_file
        super().__init__()
        self.daemon = True
        self.terminating = False

    def run(self):
        LOG.info("Aggregator running on PID %d", os.getpid())
        loop = asyncio.get_event_loop()
        self.config = DynamicConfig(loop=loop, file_name=self.config_file)
        loop.create_task(self.loop(loop=loop))

        self.event = threading.Event() # sleep control for write threads

        setup_event_loop_signal_handlers(loop, self._stop_on_signal)

        try:
            loop.run_forever()
        except KeyboardInterrupt:
            pass
        except Exception:
            LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)

    def _stop_on_signal(self, sig, loop):
        if self.terminating:
            LOG.info("Aggregator on PID %d is already being stopped...", os.getpid())
            return

        LOG.info("Stopping Aggregator on %d due to signal %d", os.getpid(), sig)
        self.terminating = True
        self.config.set_terminating()
        self.event.set()
        if 'write_on_stop' in self.config and self.config.write_on_stop \
                and 'exchanges' in self.config and self.config.exchanges:
            stop_event_loop(loop, self._write_storage(loop=loop, write_on_stop=True))
        else:
            stop_event_loop(loop)

    async def loop(self, loop):
        if self.config.cache == 'redis':
            self.cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          password=os.environ.get('REDIS_PASSWORD', None) or self.config.redis.get('password', None),
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            self.cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        self.parquet_buffer = dict()
        while not self.terminating:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    await self._write_storage(loop=loop, start=start, end=end)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    try:
                        await asyncio.sleep(delay=wait, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
                else:
                    try:
                        await asyncio.sleep(delay=30, loop=loop)
                    except asyncio.CancelledError as e:
                        pass
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise

        LOG.info("Aggregator running on PID %d stopped", os.getpid())

    async def _write_storage(self, loop, start=None, end=None, write_on_stop=False):
        if write_on_stop:
            LOG.info("Writing cached data before stopping...")
        else:
            LOG.info("Writing cached data...")

        max_workers = self.config.num_write_threads if 'num_write_threads' in self.config else 1
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
            futures = []
            for exchange in self.config.exchanges:
                for dtype in self.config.exchanges[exchange]:
                    # Skip over the retries arg in the config if present.
                    if dtype in {'retries', 'channel_timeouts', 'http_proxy'}:
                        continue
                    for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \
                                                                          self.config.exchanges[exchange][dtype] else \
                    self.config.exchanges[exchange][dtype]['symbols']:
                        futures.append(loop.run_in_executor(pool, self._write_pair_blocking, exchange, dtype, pair, start, end, write_on_stop))

            await asyncio.gather(*futures, loop=loop, return_exceptions=False)

        LOG.info("Write finished")

    def _write_pair_blocking(self, exchange, dtype, pair, start, end, write_on_stop):
        LOG.info('Reading cache for %s-%s-%s', exchange, dtype, pair)
        store = Storage(self.config, parquet_buffer=self.parquet_buffer)
        data = self.cache.read(exchange, dtype, pair, start=start, end=end)
        if len(data) == 0:
            LOG.info('No data for %s-%s-%s', exchange, dtype, pair)
            return

        store.aggregate(data)

        retries = 0
        while ((not self.terminating) or write_on_stop):
            if retries > self.config.storage_retries:
                LOG.error("Failed to write after %d retries", self.config.storage_retries)
                raise EngineWriteError

            try:
                # retrying this is ok, provided every
                # engine clears its internal buffer after writing successfully.
                LOG.info('Writing cached data to store for %s-%s-%s', exchange, dtype, pair)
                store.write(exchange, dtype, pair, time.time())
            except OSError as e:
                LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                if write_on_stop:
                    break
                if e.errno == 112:  # Host is down
                    retries += 1
                    self.event.wait(self.config.storage_retry_wait)
                else:
                    raise

            except EngineWriteError:
                LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                if write_on_stop:
                    break
                retries += 1
                self.event.wait(self.config.storage_retry_wait)
            else:
                break

        LOG.info('Deleting cached data for %s-%s-%s', exchange, dtype, pair)
        self.cache.delete(exchange, dtype, pair)
        LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
Exemple #4
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        parquet_buffer = dict()
        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start, base_interval, multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    store = Storage(self.config, parquet_buffer=parquet_buffer)
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in self.config.exchanges[exchange][dtype] else self.config.exchanges[exchange][dtype]['symbols']:
                                LOG.info('Reading %s-%s-%s', exchange, dtype, pair)
                                data = cache.read(exchange, dtype, pair, start=start, end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange, dtype, pair)
                                    continue

                                store.aggregate(data)

                                retries = 0
                                while True:
                                    if retries > self.config.storage_retries:
                                        LOG.error("Failed to write after %d retries", self.config.storage_retries)
                                        raise EngineWriteError

                                    try:
                                        # retrying this is ok, provided every
                                        # engine clears its internal buffer after writing successfully.
                                        store.write(exchange, dtype, pair, time.time())
                                    except OSError as e:
                                        if e.errno == 112:  # Host is down
                                            LOG.warning('Could not write %s-%s-%s. %s', exchange, dtype, pair, e)
                                            retries += 1
                                            await asyncio.sleep(self.config.storage_retry_wait)
                                            continue
                                        else:
                                            raise

                                    except EngineWriteError:
                                        retries += 1
                                        await asyncio.sleep(self.config.storage_retry_wait)
                                        continue
                                    else:
                                        break

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning("Storage operations currently take %.1f seconds, longer than the interval of %d", total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception", os.getpid(), exc_info=True)
                raise
Exemple #5
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    for exchange in self.config.exchanges:
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
                            for pair in self.config.exchanges[exchange][
                                    dtype] if 'symbols' not in self.config.exchanges[
                                        exchange][
                                            dtype] else self.config.exchanges[
                                                exchange][dtype]['symbols']:
                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue

                                store.aggregate(data)
                                store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
                                LOG.info('Write Complete %s-%s-%s', exchange,
                                         dtype, pair)
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise
Exemple #6
0
    async def loop(self):
        if self.config.cache == 'redis':
            cache = Redis(ip=self.config.redis['ip'],
                          port=self.config.redis['port'],
                          socket=self.config.redis.socket,
                          del_after_read=self.config.redis['del_after_read'],
                          flush=self.config.redis['start_flush'],
                          retention=self.config.redis.retention_time
                          if 'retention_time' in self.config.redis else None)
        elif self.config.cache == 'kafka':
            cache = Kafka(self.config.kafka['ip'],
                          self.config.kafka['port'],
                          flush=self.config.kafka['start_flush'])
        self.store = Storage(self.config)  ### tmp

        interval = self.config.storage_interval
        time_partition = False
        multiplier = 1
        if not isinstance(interval, int):
            if len(interval) > 1:
                multiplier = int(interval[:-1])
                interval = interval[-1]
            base_interval = interval
            if interval in {'M', 'H', 'D'}:
                time_partition = True
                if interval == 'M':
                    interval = 60 * multiplier
                elif interval == 'H':
                    interval = 3600 * multiplier
                else:
                    interval = 86400 * multiplier

        stats_cache = {}
        for exchange in self.config.exchanges:
            stats_cache[exchange] = {}
            for pair in self.config.exchanges[exchange][TRADES]:
                stats_cache[exchange][pair] = init_cache(
                    InfluxConfig(
                        db='crypto',
                        host='http://localhost:8086',
                        exchange=exchange,
                        pair=pair,
                    ))

        while True:
            start, end = None, None
            try:
                aggregation_start = time.time()
                if time_partition:
                    interval_start = aggregation_start
                    if end:
                        interval_start = end + timedelta(seconds=interval + 1)
                    start, end = get_time_interval(interval_start,
                                                   base_interval,
                                                   multiplier=multiplier)
                if 'exchanges' in self.config and self.config.exchanges:
                    data_arb = {}
                    for exchange in self.config.exchanges:
                        stats_all = [
                        ]  ### Stats from each loop iter stored here
                        data_all = []  ### Data... ""
                        data_arb[exchange] = {}
                        for dtype in self.config.exchanges[exchange]:
                            # Skip over the retries arg in the config if present.
                            if dtype in {'retries', 'channel_timeouts'}:
                                continue
#                            for pair in self.config.exchanges[exchange][dtype] if 'symbols' not in \
#                                                                                  self.config.exchanges[exchange][
#                                                                                      dtype] else \
#                                    self.config.exchanges[exchange][dtype]['symbols']:
                            for pair in self.config.exchanges[exchange][
                                    dtype]:  ### tmp
                                #                                store = Storage(self.config)
                                LOG.info('Reading %s-%s-%s', exchange, dtype,
                                         pair)
                                data = cache.read(exchange,
                                                  dtype,
                                                  pair,
                                                  start=start,
                                                  end=end)
                                data_all.append(data)
                                data_arb[exchange][pair] = data
                                if len(data) == 0:
                                    LOG.info('No data for %s-%s-%s', exchange,
                                             dtype, pair)
                                    continue
                                #
                                if dtype == TRADES:
                                    stats_all.append(
                                        self.collect_stats(
                                            data, exchange, pair, stats_cache))
#                                    LOG.info('HAVING trades')
#                                    stats_to_write = []
#                                    for trade in data:
#                                        if 'id' not in trade:
#                                            trade['id'] = None
#                                        typed_trade = Trade(**trade)
#                                        update_stats(stats_cache[exchange][pair], typed_trade, stats_to_write)
#                                    LOG.info('DONE computing stats for %s-%s', exchange, pair)
#                                    store.aggregate(stats_to_write)
#                                    store.write(exchange, STATS, pair, time.time())
#
#                                self.store.aggregate(data)
#                                self.store.write(exchange, dtype, pair, time.time())

                                cache.delete(exchange, dtype, pair)
#                                LOG.info('Write Complete %s-%s-%s', exchange, dtype, pair)
#
                    self.store.aggregate(stats_all)
                    self.store.write(exchange, STATS, pair, time.time())
                    if any(data_all):
                        self.store.aggregate(data_all)
                        self.store.write(exchange, dtype, pair, time.time())
                    if data_arb:
                        await self.write_arbs(data_arb)
                    #
                    total = time.time() - aggregation_start
                    wait = interval - total
                    if wait <= 0:
                        LOG.warning(
                            "Storage operations currently take %.1f seconds, longer than the interval of %d",
                            total, interval)
                        wait = 0.5
                    else:
                        LOG.warning(
                            f"Storage operations took {total}s, interval {interval}s"
                        )
                    await asyncio.sleep(wait)
                else:
                    await asyncio.sleep(30)
            except Exception:
                LOG.error("Aggregator running on PID %d died due to exception",
                          os.getpid(),
                          exc_info=True)
                raise