Esempio n. 1
0
class MQReader(BaseReader):
    suffix = 'rt'

    @staticmethod
    def decode(args):
        raise NotImplementedError()

    @staticmethod
    def get_value(msg):
        raise NotImplementedError()

    @staticmethod
    def get_token(msg):
        raise NotImplementedError()

    def __init__(self, cache_key='', is_bootstrap=True, is_resume=True):
        """从 MQ 读取数据
        :param cache_key: 缓存key
        :param is_bootstrap: 是否全量读取
        :param is_resume: 是否断点续传
        """
        self.is_bootstrap = is_bootstrap
        self.is_resume = is_resume

        self._cache_key = cache_key
        self._cache_resume_token = None
        self.resume_token = {}

        if self.is_resume:
            key = f'{cache_key}:{self.suffix}'
            # 从外部缓存读取
            self._cache_resume_token = Cache(key, **REDIS_CONFIG)
            self.resume_token = self.get_resume_token()

    def get_resume_token(self):
        resume_token = self._cache_resume_token.get_all(
        ) if self._cache_resume_token else {}
        return dict(map(self.decode, resume_token.items()))

    def save_resume_token(self):
        if self._cache_resume_token and self.resume_token:
            logger.info('Save resume_token')
            self._cache_resume_token.setm(self.resume_token)

    def read(self):
        raise NotImplementedError()

    def commit(self, token):
        if self.is_resume:
            self.resume_token.update(token)

    def disconnect(self, *args, **kwargs):
        raise NotImplementedError()

    def stop(self):
        self.save_resume_token()
        logger.info(f'Reader closed')
Esempio n. 2
0
class DBReader(BaseReader):
    suffix_rt = 'rt'
    suffix_ts = 'ts'

    @staticmethod
    def decode(args):
        raise NotImplementedError()

    @staticmethod
    def decode_ts(args):
        k, v = args
        return b2s(k), json_util.loads(v)

    @staticmethod
    def save(cache, value):
        if cache and value:
            logger.info(f'Save {cache.__repr__()}')
            cache.setm(value)

    def __init__(self, tables, cache_key='', is_bootstrap=True, is_resume=True):
        """从 DB 读取数据
        :param tables: list, 要读取的库表
        :param cache_key: 缓存key
        :param is_bootstrap: 是否全量查询
        :param is_resume: 是与否启用断点续传
        """
        self.tables = tables
        self.is_bootstrap = is_bootstrap
        self.is_resume = is_resume

        self._cache_key = cache_key
        # resume_token 用于记录实时增量日志读取的位置
        self._cache_resume_token = None
        self.resume_token = {}
        self._rt = {}

        # 用于记录每张表读取到的时间
        self._cache_timestamps = None
        self.timestamps = {}
        self._ts = {}

        if self.is_resume:
            key_rt = f'{cache_key}:{self.suffix_rt}'
            # 从外部缓存读取
            self._cache_resume_token = Cache(key_rt, **REDIS_CONFIG)
            self.resume_token = self._rt = self.get_resume_token()

            key_ts = f'{cache_key}:{self.suffix_ts}'
            # 从外部缓存读取
            self._cache_timestamps = Cache(key_ts, **REDIS_CONFIG)
            self.timestamps = self._ts = self.get_timestamps()

        # 新增表
        self.new_tables = set(self.tables) - set(self.timestamps.keys())
        # 保留表
        self.old_tables = set(self.tables) & set(self.timestamps.keys())
        # 需要跟踪增量表
        self.inc_tables = self.new_tables | self.old_tables

    def get_resume_token(self):
        resume_token = self._cache_resume_token.get_all() if self._cache_resume_token else {}
        return dict(map(self.decode, resume_token.items()))

    def save_resume_token(self):
        self.save(self._cache_resume_token, self.resume_token)

    def get_timestamps(self):
        timestamps = self._cache_timestamps.get_all() if self._cache_timestamps else {}
        return dict(map(self.decode_ts, timestamps.items()))

    def save_timestamps(self):
        timestamps = {k: json_util.dumps(v) for k, v in self.timestamps.items()}
        self.save(self._cache_timestamps, timestamps)

    def read(self):
        # 如果 self.keys 有新增数据库,也就是与 self.timestamps 中有不同
        # 则启动全量拉取
        if self.new_tables and self.is_bootstrap:
            yield from self.bootstrap()
            if self.is_resume:
                self.push()

        yield from self.watch()

    def bootstrap(self, *args, **kwargs):
        """读取历史数据"""
        raise NotImplementedError()

    def watch(self, *args, **kwargs):
        """读取增量数据"""
        raise NotImplementedError()

    def commit(self, *args, **kwargs):
        self.resume_token.update(self._rt)
        self.timestamps.update(self._ts)

    def push(self):
        self.save_resume_token()
        self.save_timestamps()

    def disconnect(self, *args, **kwargs):
        raise NotImplementedError()

    def stop(self):
        if self.is_resume:
            self.push()
            logger.info(f'Reader closed')