Ejemplo n.º 1
0
def test_full_etl_postgres():
    database = get_postgres_database()

    sql = "insert into test(id,amount) values(1,1)"
    get_reader(alias_postgres).execute(sql)

    etl_full(alias_postgres, database, {"test": "id"}, True)

    sql = f"select * from {database}.test"
    ret = get_writer().execute(sql)
    assert ret == [(1, Decimal("1"))]
Ejemplo n.º 2
0
def test_full_etl_mysql():
    database = get_mysql_database()

    sql = f"insert into {database}.test(amount) values(1.00)"
    get_reader(alias_mysql).execute(sql)

    etl_full(alias_mysql, database, {"test": "id"}, True)

    sql = f"select * from {database}.test"

    ret = get_writer().execute(sql)
    assert ret == [(1, Decimal("1"))]
Ejemplo n.º 3
0
def etl_full(
    alias: str,
    schema: str,
    tables_pk: Dict,
    renew=False,
):
    """
    full etl
    """
    reader = get_reader(alias)
    source_db_database = Settings.get_source_db_database(alias, schema)
    schema = source_db_database.get("database")
    writer = get_writer()
    if not writer.check_database_exists(schema):
        if source_db_database.get("auto_create") is not False:
            writer.create_database(schema)
        else:
            logger.warning(
                f"Can't etl since no database {schema} found in ClickHouse and auto_create=false"
            )
            exit(-1)
    for table in source_db_database.get("tables"):
        if table.get("auto_full_etl") is False:
            continue
        table_name = table.get("table")
        pk = tables_pk.get(table_name)
        writer = get_writer(table.get("clickhouse_engine"))
        if not pk:
            logger.warning(f"No pk found in {schema}.{table_name}, skip")
            continue
        elif isinstance(pk, tuple):
            pk = f"({','.join(pk)})"
        if renew:
            drop_sql = f"drop table if exists {schema}.{table_name}"
            writer.execute(drop_sql)
            logger.info(f"drop table success:{schema}.{table_name}")
        if not writer.check_table_exists(schema, table_name):
            sign_column = table.get("sign_column")
            version_column = table.get("version_column")
            writer.execute(
                writer.get_table_create_sql(
                    reader,
                    schema,
                    table_name,
                    pk,
                    table.get("partition_by"),
                    table.get("engine_settings"),
                    sign_column=sign_column,
                    version_column=version_column,
                ))
            if reader.fix_column_type and not table.get("skip_decimal"):
                writer.fix_table_column_type(reader, schema, table_name)
            full_insert_sql = writer.get_full_insert_sql(
                reader, schema, table_name, sign_column)
            writer.execute(full_insert_sql)
            logger.info(f"full data etl for {schema}.{table_name} success")
        else:
            logger.debug(
                f"{schema}.{table_name} exists, skip, or use --renew force etl with drop old tables"
            )
Ejemplo n.º 4
0
def produce(ctx: Context):
    alias = ctx.obj["alias"]
    reader = get_reader(alias)
    broker = get_broker(alias)
    logger.info(
        f"start producer success at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
    )
    reader.start_sync(broker)
Ejemplo n.º 5
0
def create_mysql_table(initialize_tests):
    database = get_mysql_database()
    sql = f"""create database if not exists {database};use {database};create table if not exists `test`  (
  `id` int not null auto_increment,
  `amount` decimal(10,2) not null,
  primary key (`id`)
) engine=innodb auto_increment=10 default charset=utf8mb4 collate=utf8mb4_general_ci"""
    reader = get_reader(alias_mysql)
    reader.execute(sql)
Ejemplo n.º 6
0
Archivo: cli.py Proyecto: yy1117/synch
def etl(ctx: Context, schema: str, renew: bool, table: List[str]):
    alias = ctx.obj["alias"]
    tables = table
    if not tables:
        tables = Settings.get_source_db_database_tables_name(alias, schema)
    tables_pk = {}
    reader = get_reader(alias)
    for table in tables:
        tables_pk[table] = reader.get_primary_key(schema, table)
    etl_full(alias, schema, tables_pk, renew)
Ejemplo n.º 7
0
def truncate_postgres_table(request):
    postgres = get_postgres_database()
    sql = f"truncate table {postgres}.public.test restart identity cascade"
    reader = get_reader(alias_postgres)
    reader.execute(sql)

    def finalizer():
        reader.execute(sql)
        get_writer().execute(f"truncate table if exists {postgres}.test")

    request.addfinalizer(finalizer)
Ejemplo n.º 8
0
def truncate_mysql_table(request):
    database = get_mysql_database()
    sql = f"truncate table {database}.test"
    reader = get_reader(alias_mysql)

    reader.execute(sql)

    def finalizer():
        reader.execute(sql)
        get_writer().execute(f"truncate table if exists {database}.test")

    request.addfinalizer(finalizer)
Ejemplo n.º 9
0
def check(ctx: Context, schema: str):
    alias = ctx.obj["alias"]
    reader = get_reader(alias)
    writer = get_writer()
    tables = Settings.get_source_db_database_tables_name(alias, schema)
    for table in tables:
        source_table_count = reader.get_count(schema, table)
        target_table_count = writer.get_count(schema, table)
        if source_table_count == target_table_count:
            logger.info(f"{schema}.{table} is equal, count={source_table_count}")
        else:
            logger.warning(
                f"{schema}.{table} is not equal, source_table_count={source_table_count}, target_table_count={target_table_count}"
            )
Ejemplo n.º 10
0
def consume(ctx: Context, schema: str, skip_error: bool, last_msg_id: str):
    alias = ctx.obj["alias"]
    reader = get_reader(alias)
    tables = Settings.get_source_db_database_tables_name(alias, schema)
    tables_pk = {}
    for table in tables:
        tables_pk[table] = reader.get_primary_key(schema, table)

    # try etl full
    etl_full(alias, schema, tables_pk)
    table_dict = Settings.get_source_db_database_tables_dict(alias, schema)

    continuous_etl(
        alias, schema, tables_pk, table_dict, last_msg_id, skip_error,
    )
Ejemplo n.º 11
0
def create_postgres_table(initialize_tests):
    database = get_postgres_database()
    reader = get_reader(alias_postgres)
    sql = f"create database {database}"
    try:
        reader.execute(sql)
    except psycopg2.errors.DuplicateDatabase:
        pass

    sql = """create table if not exists test
(
    id     int not null primary key,
    amount decimal(10, 2) default null
)"""
    try:
        reader.execute(sql)
    except psycopg2.ProgrammingError as e:
        assert str(e) == "no results to fetch"  # nosec: B101
Ejemplo n.º 12
0
Archivo: cli.py Proyecto: yy1117/synch
def produce(ctx: Context):
    alias = ctx.obj["alias"]
    reader = get_reader(alias)
    broker = get_broker(alias)
    logger.info(f"start producer for {alias} success")
    reader.start_sync(broker)
Ejemplo n.º 13
0
def test_execute_sql():
    reader = get_reader(alias_postgres)
    sql = "select 1"
    ret = reader.execute(sql)[0]
    assert ret == [1]
Ejemplo n.º 14
0
def test_get_pk():
    reader = get_reader(alias_mysql)
    ret = reader.get_primary_key(get_mysql_database(), "test")
    assert ret == "id"
Ejemplo n.º 15
0
def test_execute_sql():
    reader = get_reader(alias_mysql)
    sql = "select 1"
    ret = reader.execute(sql)[0]
    assert ret == {"1": 1}
Ejemplo n.º 16
0
def test_get_binlog():
    reader = get_reader(alias_mysql)
    ret = reader.get_binlog_pos()
    assert isinstance(ret, tuple)