def test_import_bare(pg_repo_local): # Check import without checking anything out, just by manipulating metadata and running LQs against # source images. # Create a new schema and import 'fruits' OUTPUT.init() # Make sure the existing table is preserved. OUTPUT.run_sql("CREATE TABLE sentinel (key INTEGER)") OUTPUT.commit() pg_repo_local.uncheckout() OUTPUT.uncheckout() OUTPUT.import_tables( tables=["imported_fruits"], source_repository=pg_repo_local, image_hash=pg_repo_local.images["latest"].image_hash, source_tables=["SELECT * FROM fruits WHERE fruit_id = 1"], parent_hash=OUTPUT.images["latest"].image_hash, do_checkout=False, table_queries=[True], ) assert OUTPUT.head is None assert pg_repo_local.head is None assert sorted(OUTPUT.images["latest"].get_tables()) == [ "imported_fruits", "sentinel" ] assert list(OUTPUT.images["latest"].get_table("imported_fruits").query( columns=["name"], quals=[])) == [{ "name": "apple" }]
def test_import_multiple_tables(pg_repo_local): OUTPUT.init() head = OUTPUT.head OUTPUT.import_tables(tables=[], source_repository=pg_repo_local, source_tables=[]) for table_name in ["fruits", "vegetables"]: assert OUTPUT.run_sql("SELECT * FROM %s" % table_name) == pg_repo_local.run_sql( "SELECT * FROM %s" % table_name) new_head = OUTPUT.head assert new_head != head assert new_head.parent_id == head.image_hash
def test_import_preserves_pending_changes(pg_repo_local): OUTPUT.init() OUTPUT.run_sql("""CREATE TABLE test (id integer, name varchar); INSERT INTO test VALUES (1, 'test')""") head = OUTPUT.commit() OUTPUT.run_sql("INSERT INTO test VALUES (2, 'test2')") changes = get_engine().get_pending_changes(OUTPUT.to_schema(), "test") OUTPUT.import_tables(tables=["imported_fruits"], source_repository=pg_repo_local, source_tables=["fruits"]) assert OUTPUT.head.parent_id == head.image_hash assert changes == OUTPUT.engine.get_pending_changes( OUTPUT.to_schema(), "test")
def test_import_basic(pg_repo_local): # Create a new schema and import 'fruits' from the mounted PG table. OUTPUT.init() head = OUTPUT.head OUTPUT.import_tables(tables=["imported_fruits"], source_repository=pg_repo_local, source_tables=["fruits"]) assert OUTPUT.run_sql("SELECT * FROM imported_fruits" ) == pg_repo_local.run_sql("SELECT * FROM fruits") new_head = OUTPUT.head assert new_head != head assert new_head.parent_id == head.image_hash
def test_import_preserves_existing_tables(pg_repo_local): # Create a new schema and import 'fruits' from the mounted PG table. head = _setup_dataset() OUTPUT.import_tables(tables=["imported_fruits"], source_repository=pg_repo_local, source_tables=["fruits"]) new_head = OUTPUT.head head.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "test") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "imported_fruits") new_head.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "test") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "imported_fruits")
def test_import_query_reuses_hash(pg_repo_local): OUTPUT.init() base = OUTPUT.head # Run two imports: one importing all rows from `fruits` (will reuse the original `fruits` object), # one importing just the first row (new hash, won't be reused). ih_v1 = OUTPUT.import_tables( source_repository=pg_repo_local, source_tables=[ "SELECT * FROM fruits", "SELECT * FROM fruits WHERE fruit_id = 1" ], tables=["fruits_all", "fruits_one"], do_checkout=False, table_queries=[True, True], ) v1 = OUTPUT.images.by_hash(ih_v1) assert v1.get_table("fruits_all").objects == pg_repo_local.head.get_table( "fruits").objects assert (len(OUTPUT.objects.get_all_objects()) == 3 ) # Original fruits and vegetables + the 1-row import # Run the same set of imports again: this time both query results already exist and will be reused. base.checkout() ih_v2 = OUTPUT.import_tables( source_repository=pg_repo_local, source_tables=[ "SELECT * FROM fruits", "SELECT * FROM fruits WHERE fruit_id = 1" ], tables=["fruits_all", "fruits_one"], do_checkout=False, table_queries=[True, True], ) v2 = OUTPUT.images.by_hash(ih_v2) assert v2.get_table("fruits_all").objects == v1.get_table( "fruits_all").objects assert v2.get_table("fruits_one").objects == v1.get_table( "fruits_one").objects assert len(OUTPUT.objects.get_all_objects() ) == 3 # No new objects have been created.