def test_nullish_datetime(self): with self.source_conn.cursor() as cur: cur.executemany( "INSERT INTO druzhba_test.test_not_null_datetime VALUES (%s, %s, %s, %s, %s);", [ (1, t.t0, t.t1, t.t1, t.t1), (2, t.t0, "", "", ""), ], ) run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute( "SELECT * FROM druzhba_test.test_not_null_datetime ORDER BY pk" ) results = cur.fetchall() self.assertListEqual( results, [ (1, t.t0, t.t1, t.t1, t.t1), (2, t.t0, t.tmin, t.tmin, t.tmin), ], )
def test_run_incremental(self): """Runs Druzhba, inserts new data, then runs Druzhba again.""" # First run - should create tracking table, target table, and insert values # TODO: check constraints are picked up correctly run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute( "SELECT COUNT(*), MAX(updated_at), MAX(value1) FROM druzhba_test.test_basic" ) result = cur.fetchall() self.assertTupleEqual(result[0], (2, t.t1, 1)) with self.source_conn.cursor() as cur: cur.execute( "UPDATE test_basic SET value1 = 2, updated_at = %s WHERE pk = 2", (t.t2,), ) cur.execute( "INSERT INTO test_basic VALUES (%s, %s, %s, %s, %s);", (3, t.t2, "drop", 3, "c"), ) # Second run - should pick up the new row and the updated row run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") results = cur.fetchall() self.assertListEqual( results, [(1, t.t0, 0, "a"), (2, t.t2, 2, "b"), (3, t.t2, 3, "c")] ) cur.execute( "SELECT * FROM druzhba_test.pipeline_table_index ORDER BY created_ts" ) results = cur.fetchall() self.assertListEqual( results, [ ( "pgtest", "druzhba_test", "test_basic", t.t1.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ], )
def test_append_only(self): """ Runs a Druzhba pipeline for a table without a PK + the append_only setting. Also, `index_sql` is used idiomatically, as if loading a write-only log table. """ with self.source_conn.cursor() as cur: cur.executemany( f"INSERT INTO druzhba_test.{self.table_name} VALUES (%s, %s, %s);", [ (1, t.t0, None), (2, t.t0, None), ], ) run_druzhba(self.args) with self.source_conn.cursor() as cur: cur.executemany( f"INSERT INTO druzhba_test.{self.table_name} VALUES (%s, %s, %s);", [ (2, t.t0, t.t1), (3, t.t1, None), ], ) run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute(f"SELECT * FROM druzhba_test.{self.table_name}") results = cur.fetchall() self.assertSetEqual( set(results), { (1, t.t0, None), (2, t.t0, None), (2, t.t0, t.t1), (3, t.t1, None), }, )
def test_skips_table_on_extra_target_column(self): # First run - should create tracking table, target table, and insert values run_druzhba(self.args) with self.source_conn.cursor() as cur: cur.execute("ALTER TABLE test_basic DROP COLUMN enum_value;") cur.execute("INSERT INTO test_basic VALUES (%s, %s, %s, %s);", (3, t.t2, "drop", 3)) # Second run - should see a discrepancy between the source/target, skip the table, # and only omit an error log rather than raise an exception. run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT COUNT(*) FROM druzhba_test.test_basic") self.assertEqual(cur.fetchone()[0], 2) cur.execute( "SELECT COUNT(*) FROM druzhba_test.pipeline_table_index") self.assertEqual(cur.fetchone()[0], 1)
def test_citext(self): with self.source_conn.cursor() as cur: cur.execute(""" DROP TABLE IF EXISTS test_basic; CREATE TABLE test_basic ( pk INT PRIMARY KEY, updated_at TIMESTAMP WITHOUT TIME ZONE NOT NULL, citext_col CITEXT ); """) cur.executemany( "INSERT INTO test_basic VALUES (%s, %s, %s);", [(1, t.t0, "citext1"), (2, t.t1, "Citext2")], ) run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") result = cur.fetchall() self.assertTupleEqual(result[0], (1, t.t0, "citext1")) self.assertTupleEqual(result[1], (2, t.t1, "Citext2"))
def test_run_incremental(self): """Runs Druzhba, inserts new data, then runs Druzhba again.""" # First run - should create tracking table, target table, and insert values # TODO: check constraints are picked up correctly run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute( "SELECT COUNT(*), MAX(updated_at), MAX(value1) FROM druzhba_test.test_basic" ) result = cur.fetchall() self.assertTupleEqual(result[0], (2, t.t1, 1)) with self.target_conn.cursor() as cur: cur.execute(""" SELECT obj_description('druzhba_test.test_basic'::regclass, 'pg_class') AS table_comment """) result = cur.fetchone() self.assertEqual(result[0], "This is a table generated by the druzhba tests") with self.target_conn.cursor() as cur: cur.execute(""" SELECT column_name , pg_catalog.col_description('druzhba_test.test_basic'::regclass, ordinal_position::INT) AS column_comment FROM information_schema.columns WHERE table_schema = 'druzhba_test' AND table_name = 'test_basic' """) result = cur.fetchall() column_comments = {row[0]: row[1] for row in result} expected_column_comments = { "pk": None, "updated_at": None, "value1": "This is a comment with this: ' special character", "enum_value": "This is a very special enum column that can have values `a`, `b`, `c`", } for column, comment in expected_column_comments.items(): self.assertEqual(comment, column_comments[column]) with self.source_conn.cursor() as cur: cur.execute( "UPDATE test_basic SET value1 = 2, updated_at = %s WHERE pk = 2", (t.t2, ), ) cur.execute( "INSERT INTO test_basic VALUES (%s, %s, %s, %s, %s);", (3, t.t2, "drop", 3, "c"), ) # Second run - should pick up the new row and the updated row run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") results = cur.fetchall() self.assertListEqual(results, [(1, t.t0, 0, "a"), (2, t.t2, 2, "b"), (3, t.t2, 3, "c")]) cur.execute( "SELECT * FROM druzhba_test.pipeline_table_index ORDER BY created_ts" ) results = cur.fetchall() self.assertListEqual( results, [ ( "pgtest", "druzhba_test", "test_basic", t.t1.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ], )
def test_new_source_column_and_force_rebuild(self): # First run - should create tracking table, target table, and insert values run_druzhba(self.args) with self.source_conn.cursor() as cur: cur.execute( "ALTER TABLE test_basic ADD COLUMN new_value VARCHAR(63) DEFAULT 'default'" ) cur.execute( "INSERT INTO test_basic VALUES (%s, %s, %s, %s, %s, %s);", (3, t.t2, "drop", 3, "c", "other"), ) # Second run - should not fail despite the new column. Should pick up new row. run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT COUNT(*) FROM druzhba_test.test_basic") self.assertEqual(cur.fetchone()[0], 3) # Third run - should recreate target table with the new column and fully refresh # TODO: Check user permissions are maintained run_druzhba(dataclass_replace(self.args, rebuild=True)) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") results = cur.fetchall() self.assertListEqual( results, [ (1, t.t0, 0, "a", "default"), (2, t.t1, 1, "b", "default"), (3, t.t2, 3, "c", "other"), ], ) cur.execute( "SELECT * FROM druzhba_test.pipeline_table_index ORDER BY created_ts" ) results = cur.fetchall() self.assertListEqual( results, [ ( "pgtest", "druzhba_test", "test_basic", t.t1.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ], )
def test_force_refresh(self): # First run - should create tracking table, target table, and insert values run_druzhba(self.args) with self.source_conn.cursor() as cur: cur.execute( "INSERT INTO test_basic VALUES (%s, %s, %s, %s, %s);", (3, t.t2, "drop", 3, "c"), ) cur.execute("DELETE FROM test_basic WHERE pk = %s", (1, )) # Second run - should pick up delete and insert run_druzhba(dataclass_replace(self.args, full_refresh=True)) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") results = cur.fetchall() self.assertListEqual(results, [(2, t.t1, 1, "b"), (3, t.t2, 3, "c")]) cur.execute( "SELECT * FROM druzhba_test.pipeline_table_index ORDER BY created_ts" ) results = cur.fetchall() self.assertListEqual( results, [ ( "pgtest", "druzhba_test", "test_basic", t.t1.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ], ) with self.source_conn.cursor() as cur: cur.execute( "INSERT INTO test_basic VALUES (%s, %s, %s, %s, %s);", (1, t.t0, "value", 0, "a"), ) cur.execute( "UPDATE test_basic SET value1 = 2, updated_at = %s WHERE pk = 2", (t.t3, ), ) # Third run: incremental updates should proceed from second run index value, # so re-insert of value 1 at original `updated_at` is not picked up while new update is. run_druzhba(self.args) with self.target_conn.cursor() as cur: cur.execute("SELECT * FROM druzhba_test.test_basic ORDER BY pk") results = cur.fetchall() self.assertListEqual(results, [(2, t.t3, 2, "b"), (3, t.t2, 3, "c")]) cur.execute( "SELECT * FROM druzhba_test.pipeline_table_index ORDER BY created_ts" ) results = cur.fetchall() self.assertListEqual( results, [ ( "pgtest", "druzhba_test", "test_basic", t.t1.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t2.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ( "pgtest", "druzhba_test", "test_basic", t.t3.strftime("%Y-%m-%d %H:%M:%S.%f"), ANY, ), ], )