def __init__(self, left_conn_uri_or_engine, right_conn_uri_or_engine=None): self.left_conn_uri_or_engine = left_conn_uri_or_engine self.left_conn = Connector(left_conn_uri_or_engine) if right_conn_uri_or_engine is None: self.right_conn_uri_or_engine = self.left_conn_uri_or_engine self.right_conn = self.left_conn else: self.right_conn_uri_or_engine = right_conn_uri_or_engine self.right_conn = Connector(right_conn_uri_or_engine)
def setUp(self): """ Init a temporary table with some data. """ sql = [ f"DROP SCHEMA IF EXISTS {DATA_QUALITY_SCHEMA} CASCADE;" f"CREATE SCHEMA IF NOT EXISTS {DATA_QUALITY_SCHEMA};", get_quality_table_creation_script(DATA_QUALITY_SCHEMA, DATA_QUALITY_TABLE_1), get_quality_table_creation_script(DATA_QUALITY_SCHEMA, DATA_QUALITY_TABLE_2), f""" create table {DATA_QUALITY_SCHEMA}.{MIGRATION_TABLE} ( version_num varchar(32) not null constraint alembic_version_pkc primary key ); INSERT INTO {DATA_QUALITY_SCHEMA}.{MIGRATION_TABLE} (version_num) VALUES ('0.1.4-hash'); """, ] self.conn = Connector(TEST_DB_URI) for s in sql: self.conn.execute(s)
def test_execute_consistency_diff(self): """ Test scenario where table diff is done on tables with different column order.""" self.inconsistent_colums_left = "user" self.inconsistent_colums_right = "user_inconsistent" self.conn = Connector(TEST_DB_URI) self.consistency_checker = ConsistencyChecker(TEST_DB_URI) sql = [ f""" CREATE TABLE IF NOT EXISTS tmp.{self.inconsistent_colums_left}( id SERIAL PRIMARY KEY, name text ) """, f""" CREATE TABLE IF NOT EXISTS tmp.{self.inconsistent_colums_right}( name text, id SERIAL PRIMARY KEY ) """, f""" INSERT INTO tmp.{self.inconsistent_colums_left} VALUES (1, 'John Doe') """, f""" INSERT INTO tmp.{self.inconsistent_colums_right} VALUES ('John Doe', 1) """, ] for s in sql: self.conn.execute(s) self.consistency_checker.run( self.consistency_checker.DIFF, left_check_table={ "schema_name": "tmp", "table_name": self.inconsistent_colums_left, }, right_check_table={ "schema_name": "tmp", "table_name": self.inconsistent_colums_right, }, result_table={ "schema_name": "data_quality", "table_name": self.result_table_name, }, context={"task_ts": self.now}, ) rows = self.conn.get_records( f""" SELECT * from data_quality.consistency_check_{self.result_table_name} order by created_at """ ) self.assertEqual(rows.fetchone()["status"], "valid")
def setUp(self): """ Init a temporary table with some data. """ self.left_table_name = "raw_booking" self.right_table_name = "booking" self.ts_nodash = (FakedDatetime.now().isoformat().replace("-", "").replace( ":", "")) self.now = FakedDatetime.now() sql = [ "DROP SCHEMA if exists tmp CASCADE;", "CREATE SCHEMA IF NOT EXISTS tmp;", "CREATE SCHEMA IF NOT EXISTS hello;", f""" CREATE TABLE IF NOT EXISTS tmp.{self.left_table_name}( id SERIAL PRIMARY KEY, src text, dst text, price int, turnover_after_refunds double precision, initial_price double precision, created_at timestamptz ) """, f""" CREATE TABLE IF NOT EXISTS hello.{self.right_table_name}( id SERIAL PRIMARY KEY, src text, dst text, price int, turnover_after_refunds double precision, initial_price double precision, created_at timestamptz ) """, f""" INSERT INTO tmp.{self.left_table_name} (src, dst, price, turnover_after_refunds, initial_price, created_at) VALUES ('BTS', NULL, 1, 100, 11, '2018-09-12T11:50:00'), (NULL, 'PEK', 33, 1.1, 13, '2018-01-12T15:50:00'), ('VIE', 'JFK', 4, 5.5, 23.4, '2018-09-11T11:50:00'), ('VIE', 'VIE', 4, 0.0, 0.0, '2018-09-11T11:50:00') """, f""" INSERT INTO hello.{self.right_table_name} (src, dst, price, turnover_after_refunds, initial_price, created_at) VALUES ('BTS', NULL, 1, 100, 11, '2018-09-12T11:50:00'), (NULL, 'PEK', 33, 1.1, 13, '2018-01-12T15:50:00'), ('VIE', 'JFK', 4, 5.5, 23.4, '2018-09-11T11:50:00') """, ] self.conn = Connector(TEST_DB_URI) for s in sql: self.conn.execute(s) self.consistency_checker = ConsistencyChecker(TEST_DB_URI)
def __init__(self, migrations_map, package_version, url, schema): """ :param migrations_map: map of package versions and their migrations. In form of dictionary {'0.1.4':'A', '0.1.5':'B'} :param package_version: the version of the package planned to be migrated :param url: the database url where the Alembic migration table is present or planned to be created :param schema: the database schema where the Alembic migration table is present or planned to be created """ self.versions_migrations = migrations_map self.package_version = package_version self.url = url self.schema = schema self.conn = Connector(self.url)
def conn(): h = Connector(TEST_DB_URI) schemas = ["tmp", "temporary", "data_quality", "raw"] create_queries = [f"create schema if not exists {s}" for s in schemas] drop_queries = [f"drop schema if exists {s} cascade" for s in schemas] for c in create_queries: h.execute(c) yield h for d in drop_queries: h.execute(d)
def setUp(self): """ Init a temporary table with some data. """ sql = [ f"DROP SCHEMA IF EXISTS {DATA_QUALITY_SCHEMA} CASCADE;" f"CREATE SCHEMA IF NOT EXISTS {DATA_QUALITY_SCHEMA};", get_quality_table_creation_script(DATA_QUALITY_SCHEMA, DATA_QUALITY_TABLE_1), get_quality_table_creation_script(DATA_QUALITY_SCHEMA, DATA_QUALITY_TABLE_2), ] self.conn = Connector(TEST_DB_URI) for s in sql: self.conn.execute(s)
def __init__(self, conn_uri_or_engine, special_qc_map=None): self.conn_uri_or_engine = conn_uri_or_engine self.conn = Connector(conn_uri_or_engine) # todo - allow cfg self.special_qc_map = special_qc_map or {}
def setUpClass(cls) -> None: cls.conn = Connector(TEST_DB_URI) cls.alembic_cfg = Config(ALEMBIC_INI_PATH) migration_table_name = cls.alembic_cfg.get_main_option("version_table") cls.migration_table = Table(DATA_QUALITY_SCHEMA, migration_table_name)
def setUp(self): """ Init a temporary table with some data. """ self.table_name = "booking_all_v2" self.ts_nodash = (FakedDatetime.now().isoformat().replace("-", "").replace( ":", "")) self.tmp_table_name = f"{self.table_name}_{self.ts_nodash}" self.now = FakedDatetime.now() sql = [ "DROP SCHEMA if exists tmp CASCADE;", "DROP SCHEMA if exists data_quality CASCADE;", "CREATE SCHEMA IF NOT EXISTS tmp;", "CREATE SCHEMA IF NOT EXISTS data_quality;", "CREATE SCHEMA IF NOT EXISTS hello;", f""" CREATE TABLE IF NOT EXISTS tmp.{self.table_name}( id SERIAL PRIMARY KEY, src text, dst text, price int, turnover_after_refunds double precision, initial_price double precision, created_at timestamptz ) """, f""" CREATE TABLE IF NOT EXISTS tmp.{self.tmp_table_name}( id SERIAL PRIMARY KEY, src text, dst text, price int, turnover_after_refunds double precision, initial_price double precision, created_at timestamptz ) """, f""" INSERT INTO tmp.{self.tmp_table_name} (src, dst, price, turnover_after_refunds, initial_price, created_at) VALUES ('BTS', NULL, 1, 100, 11, '2018-09-12T11:50:00'), -- this is older than 30 days. -- not in stats when time_filter = `created_at` (NULL, 'PEK', 33, 1.1, 13, '2018-01-12T15:50:00'), ('VIE', 'JFK', 4, 5.5, 23.4, '2018-09-11T11:50:00'), ('VIE', 'VIE', 4, 0.0, 0.0, '2018-09-11T11:50:00') """, f""" INSERT INTO tmp.{self.table_name} (src, dst, price, turnover_after_refunds, initial_price, created_at) VALUES ('BTS', NULL, 1, 100, 11, '2018-09-12T13:00:00'), -- this is older than 30 days. -- not in stats when time_filter = `created_at` (NULL, 'PEK', 33, 1.1, 13, '2018-01-12T13:00:00'), ('VIE', 'JFK', 4, 5.5, 23.4, '2018-09-11T13:00:00'), ('VIE', 'VIE', 4, 0.0, 0.0, '2018-09-11T13:00:00') """, ] self.conn = Connector(TEST_DB_URI) for s in sql: self.conn.execute(s) self.contessa_runner = ContessaRunner(TEST_DB_URI)