def test_import_splitfile_reuses_hash(local_engine_empty): # Create two repositories and run the same Splitfile that loads some data from a mounted database. # Check that the same contents result in the same hash and no extra objects being created output_2 = Repository.from_schema("output_2") execute_commands(load_splitfile("import_from_mounted_db.splitfile"), output=OUTPUT) execute_commands(load_splitfile("import_from_mounted_db.splitfile"), output=output_2) head = OUTPUT.head assert head.get_table("my_fruits").objects == [ "o71ba35a5bbf8ac7779d8fe32226aaacc298773e154a4f84e9aabf829238fb1" ] assert head.get_table("o_vegetables").objects == [ "o70e726f4bf18547242722600c4723dceaaede27db8fa5e9e6d7ec39187dd86" ] assert head.get_table("vegetables").objects == [ "ob474d04a80c611fc043e8303517ac168444dc7518af60e4ccc56b3b0986470" ] assert head.get_table("all_fruits").objects == [ "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b" ] head_2 = output_2.head assert head_2.get_table("my_fruits").objects == head.get_table( "my_fruits").objects assert head_2.get_table("o_vegetables").objects == head.get_table( "o_vegetables").objects assert head_2.get_table("vegetables").objects == head.get_table( "vegetables").objects assert head_2.get_table("all_fruits").objects == head.get_table( "all_fruits").objects
def test_splitfile_end_to_end_with_uploading(local_engine_empty, remote_engine, pg_repo_remote_multitag, mg_repo_remote, clean_minio): # An end-to-end test: # * Create a derived dataset from some tables imported from the remote engine # * Push it back to the remote engine, uploading all objects to S3 (instead of the remote engine itself) # * Delete everything from pgcache # * Run another splitfile that depends on the just-pushed dataset (and does lazy checkouts to # get the required tables). # Do the same setting up first and run the splitfile against the remote data. execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT) remote_output = Repository(OUTPUT.namespace, OUTPUT.repository, remote_engine) # Push with upload OUTPUT.push(remote_repository=remote_output, handler="S3", handler_options={}) # Unmount everything locally and cleanup for mountpoint, _ in get_current_repositories(local_engine_empty): mountpoint.delete() OUTPUT.objects.cleanup() stage_2 = R("output_stage_2") execute_commands( load_splitfile("import_from_preuploaded_remote.splitfile"), output=stage_2) assert stage_2.run_sql("SELECT id, name, fruit, vegetable FROM diet") == [ (2, "James", "orange", "carrot") ]
def test_from_remote(local_engine_empty, pg_repo_remote_multitag): # Test running commands that base new datasets on a remote repository. execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": "v1"}, output=OUTPUT) new_head = OUTPUT.head parent = OUTPUT.images.by_hash(new_head.parent_id) # Go back to the parent: the two source tables should exist there parent.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table") new_head.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert OUTPUT.run_sql("SELECT * FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ] # Now run the same splitfile but from the v2 of the remote (where row 1 has been removed from the fruits table) # First, remove the output mountpoint (the executor tries to fetch the commit 0000 from it otherwise which # doesn't exist). OUTPUT.delete() execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": "v2"}, output=OUTPUT) assert OUTPUT.run_sql("SELECT * FROM join_table") == [(2, "orange", "carrot")]
def test_splitfile_cached(pg_repo_local): # Check that no new commits/snaps are created if we rerun the same splitfile execute_commands( load_splitfile("import_local_multiple_with_queries.splitfile"), output=OUTPUT) images = OUTPUT.images() assert len(images) == 4 execute_commands( load_splitfile("import_local_multiple_with_queries.splitfile"), output=OUTPUT) new_images = OUTPUT.images() assert new_images == images
def test_rerun_with_from_import(local_engine_empty, pg_repo_remote_multitag): execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT) output_v1 = OUTPUT.head # Do a logical rebase of the newly created image on the V2 of the remote repository rebuild_image(output_v1, {pg_repo_remote_multitag: "v2"}) output_v2 = OUTPUT.head # Do some checks on the structure of the final output repo. In particular, make sure that the two derived versions # still exist and depend only on the respective tags of the source. v1 = pg_repo_remote_multitag.images["v1"] v2 = pg_repo_remote_multitag.images["v2"] assert output_v1.provenance() == [(pg_repo_remote_multitag, v1.image_hash)] assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)] ov1_log = output_v1.get_log() ov2_log = output_v2.get_log() # ov1_log: CREATE TABLE commit, then IMPORT from v1, then the 00000 commit # ov2_log: CREATE TABLE commit, then FROM v1, then the 00000.. commit assert ov1_log[2:] == ov2_log[2:] assert len(ov1_log) == 3
def test_rerun_with_new_version(local_engine_empty, pg_repo_remote_multitag): # Test running commands that base new datasets on a remote repository. execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": "v1"}, output=OUTPUT) output_v1 = OUTPUT.head # Do a logical rebase of the newly created image on the V2 of the remote repository rebuild_image(output_v1, {pg_repo_remote_multitag: "v2"}) output_v2 = OUTPUT.head assert local_engine_empty.run_sql("SELECT * FROM output.join_table") == [ (2, "orange", "carrot") ] # Do some checks on the structure of the final output repo. In particular, make sure that the two derived versions # still exist and depend only on the respective tags of the source. v1 = pg_repo_remote_multitag.images["v1"] v2 = pg_repo_remote_multitag.images["v2"] assert output_v1.provenance() == [(pg_repo_remote_multitag, v1.image_hash)] assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)] ov1_log = output_v1.get_log() ov2_log = output_v2.get_log() # ov1_log: CREATE TABLE commit, then FROM v1 # ov2_log: CREATE TABLE commit, then FROM v2 which is based on FROM v1 (since we cloned both from test/pg_mount), # then as previously. assert ov1_log[1:] == ov2_log[2:]
def test_splitfile_object_download_failure(local_engine_empty, pg_repo_remote_multitag): # Simulate an object download failure (that happens inside of the engine during IMPORT # execution) propagating to the caller and not leaving the engine in an inconsistent state. object_id = pg_repo_remote_multitag.images["v1"].get_table("fruits").objects[0] assert object_id == "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b" tmp_object_id = "o" + "0" * 62 pg_repo_remote_multitag.engine.run_sql( SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format( Identifier(object_id), Identifier(tmp_object_id) ) ) assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 with pytest.raises(ObjectCacheError) as e: execute_commands( load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT ) assert "Missing 1 object (%s)" % object_id in str(e.value) # Check the execution didn't create the image assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 # Rename the object back and retry the Splitfile pg_repo_remote_multitag.engine.run_sql( SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format( Identifier(tmp_object_id), Identifier(object_id) ) ) execute_commands( load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT ) OUTPUT.head.checkout() assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ] assert len(OUTPUT.images()) == 3 # 2 tables in the first non-empty image, 3 tables in the second image # (previous 2 + joined data). assert _get_table_count(OUTPUT) == 5
def test_calc_hash_short_circuit(pg_repo_local): # Test that if the hash returned by calc_hash is unchanged, we don't run execute() again execute_commands(load_splitfile("custom_command_calc_hash.splitfile"), output=OUTPUT) # Run 1: table gets dropped (since the image doesn't exist) log = OUTPUT.head.get_log() assert len( log) == 3 # Base 000.., import from test/pg_mount, drop table fruits assert log[0].get_tables() == [] # Hash: combination of the previous image hash and the command context (unchanged) assert log[0].image_hash == _combine_hashes( [log[1].image_hash, "deadbeef" * 8]) # Run 2: same command context hash, same original image -- no effect with patch( "test.splitgraph.splitfile.test_custom_commands.CalcHashTestCommand.execute" ) as cmd: execute_commands(load_splitfile("custom_command_calc_hash.splitfile"), output=OUTPUT) new_log = OUTPUT.head.get_log() assert new_log == log assert cmd.call_count == 0 # Run 3: alter test_pg_mount (same command context hash but different image) pg_repo_local.run_sql( """UPDATE fruits SET name = 'banana' where fruit_id = 1""") pg_repo_local.commit() with patch( "test.splitgraph.splitfile.test_custom_commands.CalcHashTestCommand.execute" ) as cmd: execute_commands(load_splitfile("custom_command_calc_hash.splitfile"), output=OUTPUT) log_3 = OUTPUT.head.get_log() assert cmd.call_count == 1 assert len(log_3) == 3 # Since we patched the execute() out, it won't have run the DROP TABLE command so we don't check for that. # However, the sg_meta is still altered. assert log_3[0].image_hash == _combine_hashes( [log_3[1].image_hash, "deadbeef" * 8]) # Import from test/pg_mount changed (since the actual repo changed) assert log_3[1].image_hash != log[1].image_hash # Base layer (00000...) unchanged assert log_3[2].image_hash == log[2].image_hash
def test_splitfile_remote(local_engine_empty, pg_repo_remote_multitag): # We use the v1 tag when importing from the remote, so fruit_id = 1 still exists there. execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT) assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ] # Now run the commands against v2 and make sure the fruit_id = 1 has disappeared from the output. execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v2"}, output=OUTPUT) assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (2, "orange", "carrot") ]
def test_splitfile_inline_sql(readonly_pg_repo, pg_repo_local): # Test SQL commands accessing repos directly -- join a remote repo with # some local data. prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True) pg_repo_local.head.tag("v2") execute_commands( load_splitfile("inline_sql.splitfile"), output=OUTPUT, ) new_head = OUTPUT.head new_head.checkout() assert new_head.get_tables() == ["balanced_diet"] assert OUTPUT.run_sql("SELECT * FROM balanced_diet") == [ (1, "apple", None, "potato"), (2, "orange", datetime.datetime(2019, 1, 1, 12, 0), "carrot"), ] local_repo_head = pg_repo_local.head.image_hash other_repo_head = readonly_pg_repo.images["latest"].image_hash assert new_head.provenance_data == [ { "sources": [ { "source": "pg_mount", "source_hash": other_repo_head, "source_namespace": "otheruser", }, { "source": "pg_mount", "source_hash": local_repo_head, "source_namespace": "test" }, ], "sql": ("CREATE TABLE balanced_diet\n" " AS SELECT fruits.fruit_id AS id\n" " , fruits.name AS fruit\n" " , my_fruits.timestamp AS timestamp\n" " , vegetables.name AS vegetable\n" " FROM " '"otheruser/pg_mount:{0}".fruits ' "AS fruits\n" " INNER JOIN " '"otheruser/pg_mount:{0}".vegetables ' "AS vegetables ON fruits.fruit_id = vegetable_id\n" " LEFT JOIN " '"test/pg_mount:{1}".fruits ' "AS my_fruits ON my_fruits.fruit_id = fruits.fruit_id;\n" "\n" "ALTER TABLE balanced_diet ADD PRIMARY KEY (id)").format( other_repo_head, local_repo_head), "type": "SQL", }, ]
def test_splitfile_remote_hash(local_engine_empty, pg_repo_remote): head = pg_repo_remote.head.image_hash execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": head[:10]}, output=OUTPUT) assert OUTPUT.run_sql( "SELECT id, fruit, vegetable FROM output.join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ]
def test_splitfile_schema_changes(pg_repo_local, mg_repo_local): execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT) old_output_head = OUTPUT.head # Then, alter the dataset and rerun the splitfile. pg_repo_local.run_sql("INSERT INTO fruits VALUES (12, 'mayonnaise')") pg_repo_local.commit() execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT) new_output_head = OUTPUT.head old_output_head.checkout() assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [("James", "orange", 12)] new_output_head.checkout() # Mayonnaise joined with Alex, ID 12 + 10 = 22. assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [ ("James", "orange", 12), ("Alex", "mayonnaise", 22), ]
def test_drawing(pg_repo_local): # Doesn't really check anything, mostly used to make sure the tree drawing code doesn't throw. execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT) # Make another branch to check multi-branch repositories can render. pg_repo_local.images()[1].checkout() pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'kiwi')") pg_repo_local.commit() rebuild_image(OUTPUT.head, {pg_repo_local: pg_repo_local.head.image_hash}) render_tree(OUTPUT)
def test_local_import_splitfile(pg_repo_local): execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT) head = OUTPUT.head old_head = head.parent_id OUTPUT.images.by_hash(old_head).checkout() assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") head.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
def test_update_without_import_splitfile(pg_repo_local): # Test that correct commits are produced by executing an splitfile (both against newly created and already # existing tables on an existing mountpoint) execute_commands(load_splitfile("update_without_import.splitfile"), output=OUTPUT) log = OUTPUT.head.get_log() log[1].checkout() assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [] log[0].checkout() assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")]
def test_basic_splitfile(pg_repo_local): execute_commands(load_splitfile("create_table.splitfile"), output=OUTPUT) log = list(reversed(OUTPUT.head.get_log())) log[1].checkout() assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [] log[2].checkout() assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")] log[3].checkout() assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple"), (2, "banana")]
def test_splitfile_with_external_sql(readonly_pg_repo): # Tests are running from root so we pass in the path to the SQL manually to the splitfile. execute_commands( load_splitfile("external_sql.splitfile"), params={"EXTERNAL_SQL_FILE": RESOURCES + "external_sql.sql"}, output=OUTPUT, ) assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ]
def test_splitfile_sql_failure(local_engine_empty, pg_repo_remote_multitag): assert len(OUTPUT.images()) == 0 assert _get_table_count(OUTPUT) == 0 with pytest.raises(psycopg2.errors.UndefinedTable) as e: execute_commands(load_splitfile("import_remote_broken_stage_2.splitfile"), output=OUTPUT) assert 'relation "nonexistent_fruits_table" does not exist' in str(e.value) # Check the execution created the first dummy (000...) image and the second image # with IMPORT results assert len(OUTPUT.images()) == 2 assert _get_table_count(OUTPUT) == 2 assert sorted(OUTPUT.images["latest"].get_tables()) == ["my_fruits", "vegetables"]
def test_from_remote_hash(local_engine_empty, pg_repo_remote): head = pg_repo_remote.head.image_hash # Test running commands that base new datasets on a remote repository. execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": head[:10]}, output=OUTPUT) assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert OUTPUT.run_sql("SELECT * FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ]
def test_rerun_multiline_sql_roundtripping(pg_repo_local): # Test that with a multiline SQL sgr rebuild doesn't create a new image # when rebuilding the same one. execute_commands(load_splitfile("multiline_sql.splitfile"), output=OUTPUT) head = OUTPUT.head expected_sql = "SQL {INSERT INTO fruits \n" "VALUES (3, 'banana')\n" " , (4, 'pineapple')}" assert head.to_splitfile()[1] == expected_sql rebuild_image(head, {}) head_v2 = OUTPUT.head assert head_v2.to_splitfile()[1] == expected_sql assert head_v2 == head
def test_provenance_with_from(local_engine_empty, pg_repo_remote_multitag): execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": "v1"}, output=OUTPUT) dependencies = OUTPUT.head.provenance() assert dependencies == [(pg_repo_remote_multitag, pg_repo_remote_multitag.images["v1"].image_hash)] source = pg_repo_remote_multitag.images["v1"] assert source.provenance(reverse=True, engine=local_engine_empty) == [ (OUTPUT, OUTPUT.head.image_hash) ] assert source.provenance() == []
def test_custom_command_errors(pg_repo_local): # Test we raise for undefined commands with pytest.raises(SplitfileError) as e: execute_commands( load_splitfile("custom_command_dummy.splitfile").replace( "DUMMY", "NOP"), output=OUTPUT) assert "Custom command NOP not found in the config!" in str(e.value) # Test we raise for commands that can't be imported with pytest.raises(SplitfileError) as e: execute_commands( load_splitfile("custom_command_dummy.splitfile").replace( "DUMMY", "BROKEN1"), output=OUTPUT, ) assert "Error loading custom command BROKEN1" in str(e.value) with pytest.raises(SplitfileError) as e: execute_commands( load_splitfile("custom_command_dummy.splitfile").replace( "DUMMY", "BROKEN2"), output=OUTPUT, ) assert "Error loading custom command BROKEN2" in str(e.value)
def test_dummy_command(pg_repo_local): # Basic test to make sure the config gets wired to the splitfile executor and the arguments # are passed to it correctly. execute_commands(load_splitfile("custom_command_dummy.splitfile"), output=OUTPUT) log = OUTPUT.head.get_log() assert ( len(log) == 3 ) # Base 000.., import from test/pg_mount, DUMMY run that created a dupe image assert log[0].get_tables() == log[1].get_tables() assert log[0].comment == 'DUMMY arg1 --arg2 "argument three"' # Run the command again -- since it returns a random hash every time, it should add yet another image to the base. execute_commands(load_splitfile("custom_command_dummy.splitfile"), output=OUTPUT) new_log = OUTPUT.head.get_log() # Two common images -- 0000... and the import assert new_log[2] == log[2] assert new_log[1] == log[1] # However, the DUMMY command created a new image with a random hash assert new_log[0] != log[0]
def test_from_multistage(local_engine_empty, pg_repo_remote_multitag): stage_2 = R("output_stage_2") # Produces two repositories: output and output_stage_2 execute_commands(load_splitfile("from_remote_multistage.splitfile"), params={"TAG": "v1"}) # Check the final output ('output_stage_2'): it should only have one single fragment (join_table # from the first stage, OUTPUT. assert stage_2.run_sql("SELECT * FROM balanced_diet") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ] # Check the commit is based on the original empty image. assert stage_2.head.parent_id == "0" * 64 assert stage_2.head.get_tables() == ["balanced_diet"]
def test_import_all(local_engine_empty): execute_commands(load_splitfile("import_all_from_mounted.splitfile"), output=OUTPUT) head = OUTPUT.head old_head = OUTPUT.images.by_hash(head.parent_id) old_head.checkout() tables = ["vegetables", "fruits"] contents = [[(1, "potato"), (2, "carrot")], [(1, "apple"), (2, "orange")]] for t in tables: assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), t) head.checkout() for t, c in zip(tables, contents): assert OUTPUT.run_sql("SELECT * FROM %s" % t) == c
def test_from_local(pg_repo_local): execute_commands(load_splitfile("from_local.splitfile"), output=OUTPUT) new_head = OUTPUT.head # Go back to the parent: the two source tables should exist there OUTPUT.images.by_hash(new_head.parent_id).checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table") new_head.checkout() assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert OUTPUT.run_sql("SELECT * FROM join_table") == [ (1, "apple", "potato"), (2, "orange", "carrot"), ]
def test_splitfile_recreate(local_engine_empty, pg_repo_remote_multitag): execute_commands( load_splitfile("import_with_custom_query_and_sql.splitfile"), params={"TAG": "v1"}, output=OUTPUT, ) recreated_commands = OUTPUT.head.to_splitfile() assert recreated_commands == [ "FROM test/pg_mount:%s IMPORT {SELECT *\n" % pg_repo_remote_multitag.images["v1"].image_hash + """FROM fruits WHERE name = 'orange'} AS my_fruits, {SELECT * FROM vegetables WHERE name LIKE '%o'} AS o_vegetables, vegetables AS vegetables, fruits AS all_fruits""", """SQL {CREATE TABLE test_table AS SELECT * FROM all_fruits}""", ]
def test_provenance(local_engine_empty, pg_repo_remote_multitag): execute_commands(load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT) dependencies = OUTPUT.head.provenance() assert dependencies == [(pg_repo_remote_multitag, pg_repo_remote_multitag.images["v1"].image_hash)] # Check reverse provenance. Since the repository lives on the remote engine, we need to # search for dependents on the local engine instead. source = pg_repo_remote_multitag.images["v1"] assert source.provenance(reverse=True, engine=local_engine_empty) == [ (OUTPUT, OUTPUT.head.image_hash) ] assert source.provenance() == []
def test_splitfile_recreate_custom_from(local_engine_empty, pg_repo_remote_multitag): execute_commands(load_splitfile("from_remote.splitfile"), params={"TAG": "v1"}, output=OUTPUT) recreated_commands = OUTPUT.head.to_splitfile() assert recreated_commands == [ "FROM test/pg_mount:%s" % pg_repo_remote_multitag.images["v1"].image_hash, # Test provenance is recorded using the reformatted SQL """SQL {CREATE TABLE join_table AS SELECT fruit_id AS id , fruits.name AS fruit , vegetables.name AS vegetable FROM fruits INNER JOIN vegetables ON fruit_id = vegetable_id}""", ]
def test_advanced_splitfile(pg_repo_local): execute_commands( load_splitfile("import_local_multiple_with_queries.splitfile"), output=OUTPUT) assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables") assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits") assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table") head = OUTPUT.head old_head = head.parent_id OUTPUT.images.by_hash(old_head).checkout() assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table") head.checkout() assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [ (2, "orange", "carrot") ] assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(2, "orange")]