コード例 #1
0
def init_logging():
    """
    init logging config
    :param debug:
    :return:
    """
    base_logger = logging.getLogger("synch")
    debug = Settings.debug()
    if debug:
        base_logger.setLevel(logging.DEBUG)
    else:
        base_logger.setLevel(logging.INFO)
    fmt = logging.Formatter(
        fmt="%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    sh = logging.StreamHandler(sys.stdout)
    sh.setLevel(logging.DEBUG)
    sh.setFormatter(fmt)
    base_logger.addHandler(sh)
    mail = Settings.get("mail")
    if mail:
        rate_limit = RateLimitingFilter(per=60)
        sh = logging.handlers.SMTPHandler(
            mailhost=mail.get("mailhost"),
            fromaddr=mail.get("fromaddr"),
            toaddrs=mail.get("toaddrs"),
            subject=mail.get("subject"),
            credentials=(mail.get("user"), mail.get("password")),
        )
        sh.setLevel(logging.ERROR)
        sh.setFormatter(fmt)
        sh.addFilter(rate_limit)
        base_logger.addHandler(sh)
コード例 #2
0
def get_writer(engine: ClickHouseEngine = None, choice=True) -> Union[ClickHouse, List[ClickHouse]]:
    """
    get writer once
    """
    writers = _writers.get(engine)
    if not choice:
        return writers
    if not writers:
        settings = Settings.get("clickhouse")
        hosts = settings.get("hosts")
        if Settings.is_cluster() and len(hosts) <= 1:
            raise ConfigurationError("hosts must more than one when cluster")
        for host in hosts:
            args = [host, settings.get("user"), settings.get("password"), Settings.cluster_name()]
            if engine == ClickHouseEngine.merge_tree.value:
                w = ClickHouseMergeTree(*args)
            elif engine == ClickHouseEngine.collapsing_merge_tree:
                w = ClickHouseCollapsingMergeTree(*args)
            elif engine == ClickHouseEngine.versioned_collapsing_merge_tree:
                w = ClickHouseVersionedCollapsingMergeTree(*args)
            elif engine == ClickHouseEngine.replacing_merge_tree or engine is None:
                w = ClickHouseReplacingMergeTree(*args)
            else:
                w = ClickHouse(*args)
            _writers.setdefault(engine, []).append(w)
    return random.choice(_writers.get(engine))  # nosec:B311
コード例 #3
0
ファイル: redis.py プロジェクト: zhbdesign/synch
 def __init__(self):
     """
     init setting and create redis instance
     """
     settings = Settings.get("redis")
     self.prefix = settings.get("prefix")
     self.queue_max_len = settings.get("queue_max_len")
     self.sentinel = settings.get("sentinel")
     if self.sentinel:
         sentinel = Sentinel(sentinels=map(lambda x: x.split(":"),
                                           settings.get("sentinel_hosts")))
         kwargs = dict(
             service_name=settings.get("sentinel_master"),
             password=settings.get("password"),
             decode_responses=True,
         )
         self.master = sentinel.master_for(**kwargs)
         self.slave = sentinel.slave_for(**kwargs)
     else:
         pool = redis.ConnectionPool(
             host=settings.get("host"),
             port=settings.get("port"),
             db=settings.get("db"),
             password=settings.get("password"),
             decode_responses=True,
         )
         self.master = self.slave = redis.StrictRedis(connection_pool=pool)
コード例 #4
0
def init(config_file):
    """
    init
    """
    Settings.init(config_file)
    init_logging()
    dsn = Settings.get("sentry", "dsn")
    if dsn:
        import sentry_sdk
        from sentry_sdk.integrations.redis import RedisIntegration

        sentry_sdk.init(
            dsn,
            environment=Settings.get("sentry", "environment"),
            integrations=[RedisIntegration()],
        )
    if Settings.monitoring():
        init_monitor_db()
コード例 #5
0
ファイル: kafka.py プロジェクト: zhangweiwhim/synch
 def __init__(self, alias):
     super().__init__(alias)
     self.servers = Settings.get("kafka").get("servers")
     self.topic = f'{Settings.get("kafka").get("topic_prefix")}.{alias}'
     self.databases = Settings.get_source_db(alias).get("databases")
     self.producer = KafkaProducer(
         bootstrap_servers=self.servers,
         value_serializer=lambda x: json.dumps(x, cls=JsonEncoder).encode(),
         key_serializer=lambda x: x.encode(),
     )
     self._init_topic()
コード例 #6
0
def get_writer(engine: ClickHouseEngine = None) -> ClickHouse:
    """
    get writer once
    """
    w = _writers.get(engine)
    if not w:
        settings = Settings.get("clickhouse")
        if engine == ClickHouseEngine.merge_tree.value:
            w = ClickHouseMergeTree(settings)
        elif engine == ClickHouseEngine.collapsing_merge_tree:
            w = ClickHouseCollapsingMergeTree(settings)
        elif engine == ClickHouseEngine.versioned_collapsing_merge_tree:
            w = ClickHouseVersionedCollapsingMergeTree(settings)
        elif engine == ClickHouseEngine.replacing_merge_tree or engine is None:
            w = ClickHouseReplacingMergeTree(settings)
        _writers[engine] = w
    return w
コード例 #7
0
ファイル: etl.py プロジェクト: zhangweiwhim/synch
def etl_full(
    alias: str,
    schema: str,
    tables_pk: Dict,
    renew=False,
):
    """
    full etl
    """
    reader = get_reader(alias)
    source_db_database = Settings.get_source_db_database(alias, schema)
    schema = source_db_database.get("database")
    writer = get_writer()
    if not writer.check_database_exists(schema):
        if source_db_database.get("auto_create") is not False:
            writer.create_database(schema, Settings.cluster_name())
        else:
            logger.warning(
                f"Can't etl since no database {schema} found in ClickHouse and auto_create=false"
            )
            exit(-1)
    for table in source_db_database.get("tables"):
        if table.get("auto_full_etl") is False:
            continue
        table_name = table.get("table")
        pk = tables_pk.get(table_name)
        writer = get_writer(table.get("clickhouse_engine"))
        if not pk and not renew:
            logger.warning(f"No pk found in {schema}.{table_name}, skip")
            continue
        elif isinstance(pk, tuple):
            pk = f"({','.join(pk)})"
        if renew:
            drop_sql = f"drop table if exists {schema}.{table_name}"
            writer.execute(drop_sql)
            logger.info(f"drop table success:{schema}.{table_name}")
        if not writer.check_table_exists(schema, table_name):
            sign_column = table.get("sign_column")
            version_column = table.get("version_column")
            writer.execute(
                writer.get_table_create_sql(
                    reader,
                    schema,
                    table_name,
                    pk,
                    table.get("partition_by"),
                    table.get("engine_settings"),
                    sign_column=sign_column,
                    version_column=version_column,
                ))
            if Settings.is_cluster():
                for w in get_writer(choice=False):
                    w.execute(
                        w.get_distributed_table_create_sql(
                            schema, table_name,
                            Settings.get("clickhouse.distributed_suffix")))
            if reader.fix_column_type and not table.get("skip_decimal"):
                writer.fix_table_column_type(reader, schema, table_name)
            full_insert_sql = writer.get_full_insert_sql(
                reader, schema, table_name, sign_column)
            writer.execute(full_insert_sql)
            logger.info(f"full data etl for {schema}.{table_name} success")
        else:
            logger.debug(
                f"{schema}.{table_name} exists, skip, or use --renew force etl with drop old tables"
            )