コード例 #1
0
def test_import_splitfile_reuses_hash(local_engine_empty):
    # Create two repositories and run the same Splitfile that loads some data from a mounted database.
    # Check that the same contents result in the same hash and no extra objects being created
    output_2 = Repository.from_schema("output_2")

    execute_commands(load_splitfile("import_from_mounted_db.splitfile"),
                     output=OUTPUT)
    execute_commands(load_splitfile("import_from_mounted_db.splitfile"),
                     output=output_2)

    head = OUTPUT.head
    assert head.get_table("my_fruits").objects == [
        "o71ba35a5bbf8ac7779d8fe32226aaacc298773e154a4f84e9aabf829238fb1"
    ]
    assert head.get_table("o_vegetables").objects == [
        "o70e726f4bf18547242722600c4723dceaaede27db8fa5e9e6d7ec39187dd86"
    ]
    assert head.get_table("vegetables").objects == [
        "ob474d04a80c611fc043e8303517ac168444dc7518af60e4ccc56b3b0986470"
    ]
    assert head.get_table("all_fruits").objects == [
        "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b"
    ]

    head_2 = output_2.head
    assert head_2.get_table("my_fruits").objects == head.get_table(
        "my_fruits").objects
    assert head_2.get_table("o_vegetables").objects == head.get_table(
        "o_vegetables").objects
    assert head_2.get_table("vegetables").objects == head.get_table(
        "vegetables").objects
    assert head_2.get_table("all_fruits").objects == head.get_table(
        "all_fruits").objects
コード例 #2
0
def test_splitfile_end_to_end_with_uploading(local_engine_empty, remote_engine,
                                             pg_repo_remote_multitag,
                                             mg_repo_remote, clean_minio):
    # An end-to-end test:
    #   * Create a derived dataset from some tables imported from the remote engine
    #   * Push it back to the remote engine, uploading all objects to S3 (instead of the remote engine itself)
    #   * Delete everything from pgcache
    #   * Run another splitfile that depends on the just-pushed dataset (and does lazy checkouts to
    #     get the required tables).

    # Do the same setting up first and run the splitfile against the remote data.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    remote_output = Repository(OUTPUT.namespace, OUTPUT.repository,
                               remote_engine)

    # Push with upload
    OUTPUT.push(remote_repository=remote_output,
                handler="S3",
                handler_options={})
    # Unmount everything locally and cleanup
    for mountpoint, _ in get_current_repositories(local_engine_empty):
        mountpoint.delete()
    OUTPUT.objects.cleanup()

    stage_2 = R("output_stage_2")
    execute_commands(
        load_splitfile("import_from_preuploaded_remote.splitfile"),
        output=stage_2)

    assert stage_2.run_sql("SELECT id, name, fruit, vegetable FROM diet") == [
        (2, "James", "orange", "carrot")
    ]
コード例 #3
0
def test_from_remote(local_engine_empty, pg_repo_remote_multitag):
    # Test running commands that base new datasets on a remote repository.
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    new_head = OUTPUT.head
    parent = OUTPUT.images.by_hash(new_head.parent_id)
    # Go back to the parent: the two source tables should exist there
    parent.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")

    new_head.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert OUTPUT.run_sql("SELECT * FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    # Now run the same splitfile but from the v2 of the remote (where row 1 has been removed from the fruits table)
    # First, remove the output mountpoint (the executor tries to fetch the commit 0000 from it otherwise which
    # doesn't exist).
    OUTPUT.delete()
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v2"},
                     output=OUTPUT)

    assert OUTPUT.run_sql("SELECT * FROM join_table") == [(2, "orange",
                                                           "carrot")]
コード例 #4
0
def test_splitfile_cached(pg_repo_local):
    # Check that no new commits/snaps are created if we rerun the same splitfile
    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    images = OUTPUT.images()
    assert len(images) == 4

    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    new_images = OUTPUT.images()
    assert new_images == images
コード例 #5
0
def test_rerun_with_from_import(local_engine_empty, pg_repo_remote_multitag):
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    output_v1 = OUTPUT.head
    # Do a logical rebase of the newly created image on the V2 of the remote repository

    rebuild_image(output_v1, {pg_repo_remote_multitag: "v2"})
    output_v2 = OUTPUT.head

    # Do some checks on the structure of the final output repo. In particular, make sure that the two derived versions
    # still exist and depend only on the respective tags of the source.
    v1 = pg_repo_remote_multitag.images["v1"]
    v2 = pg_repo_remote_multitag.images["v2"]
    assert output_v1.provenance() == [(pg_repo_remote_multitag, v1.image_hash)]
    assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)]

    ov1_log = output_v1.get_log()
    ov2_log = output_v2.get_log()

    # ov1_log: CREATE TABLE commit, then IMPORT from v1, then the 00000 commit
    # ov2_log: CREATE TABLE commit, then FROM v1, then the 00000.. commit
    assert ov1_log[2:] == ov2_log[2:]
    assert len(ov1_log) == 3
コード例 #6
0
def test_rerun_with_new_version(local_engine_empty, pg_repo_remote_multitag):
    # Test running commands that base new datasets on a remote repository.
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)

    output_v1 = OUTPUT.head
    # Do a logical rebase of the newly created image on the V2 of the remote repository

    rebuild_image(output_v1, {pg_repo_remote_multitag: "v2"})
    output_v2 = OUTPUT.head
    assert local_engine_empty.run_sql("SELECT * FROM output.join_table") == [
        (2, "orange", "carrot")
    ]

    # Do some checks on the structure of the final output repo. In particular, make sure that the two derived versions
    # still exist and depend only on the respective tags of the source.
    v1 = pg_repo_remote_multitag.images["v1"]
    v2 = pg_repo_remote_multitag.images["v2"]
    assert output_v1.provenance() == [(pg_repo_remote_multitag, v1.image_hash)]
    assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)]

    ov1_log = output_v1.get_log()
    ov2_log = output_v2.get_log()

    # ov1_log: CREATE TABLE commit, then FROM v1
    # ov2_log: CREATE TABLE commit, then FROM v2 which is based on FROM v1 (since we cloned both from test/pg_mount),
    # then as previously.
    assert ov1_log[1:] == ov2_log[2:]
コード例 #7
0
ファイル: test_errors.py プロジェクト: yanyu510/splitgraph
def test_splitfile_object_download_failure(local_engine_empty, pg_repo_remote_multitag):
    # Simulate an object download failure (that happens inside of the engine during IMPORT
    # execution) propagating to the caller and not leaving the engine in an inconsistent state.

    object_id = pg_repo_remote_multitag.images["v1"].get_table("fruits").objects[0]
    assert object_id == "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b"
    tmp_object_id = "o" + "0" * 62

    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(object_id), Identifier(tmp_object_id)
        )
    )

    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(ObjectCacheError) as e:
        execute_commands(
            load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
        )
    assert "Missing 1 object (%s)" % object_id in str(e.value)

    # Check the execution didn't create the image
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    # Rename the object back and retry the Splitfile
    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(tmp_object_id), Identifier(object_id)
        )
    )

    execute_commands(
        load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
    )
    OUTPUT.head.checkout()
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    assert len(OUTPUT.images()) == 3
    # 2 tables in the first non-empty image, 3 tables in the second image
    # (previous 2 + joined data).
    assert _get_table_count(OUTPUT) == 5
コード例 #8
0
def test_calc_hash_short_circuit(pg_repo_local):
    # Test that if the hash returned by calc_hash is unchanged, we don't run execute() again
    execute_commands(load_splitfile("custom_command_calc_hash.splitfile"),
                     output=OUTPUT)

    # Run 1: table gets dropped (since the image doesn't exist)
    log = OUTPUT.head.get_log()
    assert len(
        log) == 3  # Base 000.., import from test/pg_mount, drop table fruits
    assert log[0].get_tables() == []
    # Hash: combination of the previous image hash and the command context (unchanged)
    assert log[0].image_hash == _combine_hashes(
        [log[1].image_hash, "deadbeef" * 8])

    # Run 2: same command context hash, same original image -- no effect
    with patch(
            "test.splitgraph.splitfile.test_custom_commands.CalcHashTestCommand.execute"
    ) as cmd:
        execute_commands(load_splitfile("custom_command_calc_hash.splitfile"),
                         output=OUTPUT)
        new_log = OUTPUT.head.get_log()
        assert new_log == log
        assert cmd.call_count == 0

    # Run 3: alter test_pg_mount (same command context hash but different image)
    pg_repo_local.run_sql(
        """UPDATE fruits SET name = 'banana' where fruit_id = 1""")
    pg_repo_local.commit()
    with patch(
            "test.splitgraph.splitfile.test_custom_commands.CalcHashTestCommand.execute"
    ) as cmd:
        execute_commands(load_splitfile("custom_command_calc_hash.splitfile"),
                         output=OUTPUT)
        log_3 = OUTPUT.head.get_log()

        assert cmd.call_count == 1
        assert len(log_3) == 3

        # Since we patched the execute() out, it won't have run the DROP TABLE command so we don't check for that.
        # However, the sg_meta is still altered.
        assert log_3[0].image_hash == _combine_hashes(
            [log_3[1].image_hash, "deadbeef" * 8])
        # Import from test/pg_mount changed (since the actual repo changed)
        assert log_3[1].image_hash != log[1].image_hash
        # Base layer (00000...) unchanged
        assert log_3[2].image_hash == log[2].image_hash
コード例 #9
0
def test_splitfile_remote(local_engine_empty, pg_repo_remote_multitag):
    # We use the v1 tag when importing from the remote, so fruit_id = 1 still exists there.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    # Now run the commands against v2 and make sure the fruit_id = 1 has disappeared from the output.
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v2"},
                     output=OUTPUT)
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (2, "orange", "carrot")
    ]
コード例 #10
0
def test_splitfile_inline_sql(readonly_pg_repo, pg_repo_local):
    # Test SQL commands accessing repos directly -- join a remote repo with
    # some local data.

    prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True)
    pg_repo_local.head.tag("v2")

    execute_commands(
        load_splitfile("inline_sql.splitfile"),
        output=OUTPUT,
    )

    new_head = OUTPUT.head
    new_head.checkout()
    assert new_head.get_tables() == ["balanced_diet"]
    assert OUTPUT.run_sql("SELECT * FROM balanced_diet") == [
        (1, "apple", None, "potato"),
        (2, "orange", datetime.datetime(2019, 1, 1, 12, 0), "carrot"),
    ]

    local_repo_head = pg_repo_local.head.image_hash
    other_repo_head = readonly_pg_repo.images["latest"].image_hash

    assert new_head.provenance_data == [
        {
            "sources": [
                {
                    "source": "pg_mount",
                    "source_hash": other_repo_head,
                    "source_namespace": "otheruser",
                },
                {
                    "source": "pg_mount",
                    "source_hash": local_repo_head,
                    "source_namespace": "test"
                },
            ],
            "sql": ("CREATE TABLE balanced_diet\n"
                    "  AS SELECT fruits.fruit_id AS id\n"
                    "          , fruits.name AS fruit\n"
                    "          , my_fruits.timestamp AS timestamp\n"
                    "          , vegetables.name AS vegetable\n"
                    "     FROM "
                    '"otheruser/pg_mount:{0}".fruits '
                    "AS fruits\n"
                    "          INNER JOIN "
                    '"otheruser/pg_mount:{0}".vegetables '
                    "AS vegetables ON fruits.fruit_id = vegetable_id\n"
                    "          LEFT JOIN "
                    '"test/pg_mount:{1}".fruits '
                    "AS my_fruits ON my_fruits.fruit_id = fruits.fruit_id;\n"
                    "\n"
                    "ALTER TABLE balanced_diet ADD PRIMARY KEY (id)").format(
                        other_repo_head, local_repo_head),
            "type":
            "SQL",
        },
    ]
コード例 #11
0
def test_splitfile_remote_hash(local_engine_empty, pg_repo_remote):
    head = pg_repo_remote.head.image_hash
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": head[:10]},
                     output=OUTPUT)
    assert OUTPUT.run_sql(
        "SELECT id, fruit, vegetable FROM output.join_table") == [
            (1, "apple", "potato"),
            (2, "orange", "carrot"),
        ]
コード例 #12
0
def test_splitfile_schema_changes(pg_repo_local, mg_repo_local):
    execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT)
    old_output_head = OUTPUT.head

    # Then, alter the dataset and rerun the splitfile.
    pg_repo_local.run_sql("INSERT INTO fruits VALUES (12, 'mayonnaise')")
    pg_repo_local.commit()
    execute_commands(load_splitfile("schema_changes.splitfile"), output=OUTPUT)
    new_output_head = OUTPUT.head

    old_output_head.checkout()
    assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [("James",
                                                              "orange", 12)]

    new_output_head.checkout()
    # Mayonnaise joined with Alex, ID 12 + 10 = 22.
    assert OUTPUT.run_sql("SELECT * FROM spirit_fruits") == [
        ("James", "orange", 12),
        ("Alex", "mayonnaise", 22),
    ]
コード例 #13
0
def test_drawing(pg_repo_local):
    # Doesn't really check anything, mostly used to make sure the tree drawing code doesn't throw.
    execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT)

    # Make another branch to check multi-branch repositories can render.
    pg_repo_local.images()[1].checkout()
    pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'kiwi')")
    pg_repo_local.commit()

    rebuild_image(OUTPUT.head, {pg_repo_local: pg_repo_local.head.image_hash})

    render_tree(OUTPUT)
コード例 #14
0
def test_local_import_splitfile(pg_repo_local):
    execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT)
    head = OUTPUT.head
    old_head = head.parent_id

    OUTPUT.images.by_hash(old_head).checkout()
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")

    head.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
コード例 #15
0
def test_update_without_import_splitfile(pg_repo_local):
    # Test that correct commits are produced by executing an splitfile (both against newly created and already
    # existing tables on an existing mountpoint)
    execute_commands(load_splitfile("update_without_import.splitfile"),
                     output=OUTPUT)
    log = OUTPUT.head.get_log()

    log[1].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == []

    log[0].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")]
コード例 #16
0
def test_basic_splitfile(pg_repo_local):
    execute_commands(load_splitfile("create_table.splitfile"), output=OUTPUT)
    log = list(reversed(OUTPUT.head.get_log()))

    log[1].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == []

    log[2].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple")]

    log[3].checkout()
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(1, "pineapple"),
                                                         (2, "banana")]
コード例 #17
0
def test_splitfile_with_external_sql(readonly_pg_repo):

    # Tests are running from root so we pass in the path to the SQL manually to the splitfile.
    execute_commands(
        load_splitfile("external_sql.splitfile"),
        params={"EXTERNAL_SQL_FILE": RESOURCES + "external_sql.sql"},
        output=OUTPUT,
    )

    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
コード例 #18
0
ファイル: test_errors.py プロジェクト: yanyu510/splitgraph
def test_splitfile_sql_failure(local_engine_empty, pg_repo_remote_multitag):
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(psycopg2.errors.UndefinedTable) as e:
        execute_commands(load_splitfile("import_remote_broken_stage_2.splitfile"), output=OUTPUT)
    assert 'relation "nonexistent_fruits_table" does not exist' in str(e.value)

    # Check the execution created the first dummy (000...) image and the second image
    # with IMPORT results
    assert len(OUTPUT.images()) == 2
    assert _get_table_count(OUTPUT) == 2
    assert sorted(OUTPUT.images["latest"].get_tables()) == ["my_fruits", "vegetables"]
コード例 #19
0
def test_from_remote_hash(local_engine_empty, pg_repo_remote):
    head = pg_repo_remote.head.image_hash
    # Test running commands that base new datasets on a remote repository.
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": head[:10]},
                     output=OUTPUT)

    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert OUTPUT.run_sql("SELECT * FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
コード例 #20
0
def test_rerun_multiline_sql_roundtripping(pg_repo_local):
    # Test that with a multiline SQL sgr rebuild doesn't create a new image
    # when rebuilding the same one.
    execute_commands(load_splitfile("multiline_sql.splitfile"), output=OUTPUT)

    head = OUTPUT.head
    expected_sql = "SQL {INSERT INTO fruits \n" "VALUES (3, 'banana')\n" "     , (4, 'pineapple')}"

    assert head.to_splitfile()[1] == expected_sql

    rebuild_image(head, {})
    head_v2 = OUTPUT.head
    assert head_v2.to_splitfile()[1] == expected_sql
    assert head_v2 == head
コード例 #21
0
def test_provenance_with_from(local_engine_empty, pg_repo_remote_multitag):
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)
    dependencies = OUTPUT.head.provenance()

    assert dependencies == [(pg_repo_remote_multitag,
                             pg_repo_remote_multitag.images["v1"].image_hash)]

    source = pg_repo_remote_multitag.images["v1"]
    assert source.provenance(reverse=True, engine=local_engine_empty) == [
        (OUTPUT, OUTPUT.head.image_hash)
    ]

    assert source.provenance() == []
コード例 #22
0
def test_custom_command_errors(pg_repo_local):
    # Test we raise for undefined commands
    with pytest.raises(SplitfileError) as e:
        execute_commands(
            load_splitfile("custom_command_dummy.splitfile").replace(
                "DUMMY", "NOP"),
            output=OUTPUT)
    assert "Custom command NOP not found in the config!" in str(e.value)

    # Test we raise for commands that can't be imported
    with pytest.raises(SplitfileError) as e:
        execute_commands(
            load_splitfile("custom_command_dummy.splitfile").replace(
                "DUMMY", "BROKEN1"),
            output=OUTPUT,
        )
    assert "Error loading custom command BROKEN1" in str(e.value)
    with pytest.raises(SplitfileError) as e:
        execute_commands(
            load_splitfile("custom_command_dummy.splitfile").replace(
                "DUMMY", "BROKEN2"),
            output=OUTPUT,
        )
    assert "Error loading custom command BROKEN2" in str(e.value)
コード例 #23
0
def test_dummy_command(pg_repo_local):
    # Basic test to make sure the config gets wired to the splitfile executor and the arguments
    # are passed to it correctly.
    execute_commands(load_splitfile("custom_command_dummy.splitfile"),
                     output=OUTPUT)
    log = OUTPUT.head.get_log()

    assert (
        len(log) == 3
    )  # Base 000.., import from test/pg_mount, DUMMY run that created a dupe image
    assert log[0].get_tables() == log[1].get_tables()
    assert log[0].comment == 'DUMMY arg1 --arg2 "argument three"'

    # Run the command again -- since it returns a random hash every time, it should add yet another image to the base.
    execute_commands(load_splitfile("custom_command_dummy.splitfile"),
                     output=OUTPUT)
    new_log = OUTPUT.head.get_log()

    # Two common images -- 0000... and the import
    assert new_log[2] == log[2]
    assert new_log[1] == log[1]

    # However, the DUMMY command created a new image with a random hash
    assert new_log[0] != log[0]
コード例 #24
0
def test_from_multistage(local_engine_empty, pg_repo_remote_multitag):
    stage_2 = R("output_stage_2")

    # Produces two repositories: output and output_stage_2
    execute_commands(load_splitfile("from_remote_multistage.splitfile"),
                     params={"TAG": "v1"})

    # Check the final output ('output_stage_2'): it should only have one single fragment (join_table
    # from the first stage, OUTPUT.
    assert stage_2.run_sql("SELECT * FROM balanced_diet") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
    # Check the commit is based on the original empty image.
    assert stage_2.head.parent_id == "0" * 64
    assert stage_2.head.get_tables() == ["balanced_diet"]
コード例 #25
0
def test_import_all(local_engine_empty):
    execute_commands(load_splitfile("import_all_from_mounted.splitfile"),
                     output=OUTPUT)

    head = OUTPUT.head
    old_head = OUTPUT.images.by_hash(head.parent_id)

    old_head.checkout()
    tables = ["vegetables", "fruits"]
    contents = [[(1, "potato"), (2, "carrot")], [(1, "apple"), (2, "orange")]]
    for t in tables:
        assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), t)

    head.checkout()
    for t, c in zip(tables, contents):
        assert OUTPUT.run_sql("SELECT * FROM %s" % t) == c
コード例 #26
0
def test_from_local(pg_repo_local):
    execute_commands(load_splitfile("from_local.splitfile"), output=OUTPUT)

    new_head = OUTPUT.head
    # Go back to the parent: the two source tables should exist there
    OUTPUT.images.by_hash(new_head.parent_id).checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")

    new_head.checkout()
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert OUTPUT.run_sql("SELECT * FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]
コード例 #27
0
def test_splitfile_recreate(local_engine_empty, pg_repo_remote_multitag):
    execute_commands(
        load_splitfile("import_with_custom_query_and_sql.splitfile"),
        params={"TAG": "v1"},
        output=OUTPUT,
    )
    recreated_commands = OUTPUT.head.to_splitfile()
    assert recreated_commands == [
        "FROM test/pg_mount:%s IMPORT {SELECT *\n" %
        pg_repo_remote_multitag.images["v1"].image_hash + """FROM fruits
WHERE name = 'orange'} AS my_fruits, {SELECT *
FROM vegetables
WHERE name LIKE '%o'} AS o_vegetables, vegetables AS vegetables, fruits AS all_fruits""",
        """SQL {CREATE TABLE test_table
  AS SELECT *
     FROM all_fruits}""",
    ]
コード例 #28
0
def test_provenance(local_engine_empty, pg_repo_remote_multitag):
    execute_commands(load_splitfile("import_remote_multiple.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)
    dependencies = OUTPUT.head.provenance()

    assert dependencies == [(pg_repo_remote_multitag,
                             pg_repo_remote_multitag.images["v1"].image_hash)]

    # Check reverse provenance. Since the repository lives on the remote engine, we need to
    # search for dependents on the local engine instead.
    source = pg_repo_remote_multitag.images["v1"]
    assert source.provenance(reverse=True, engine=local_engine_empty) == [
        (OUTPUT, OUTPUT.head.image_hash)
    ]

    assert source.provenance() == []
コード例 #29
0
def test_splitfile_recreate_custom_from(local_engine_empty,
                                        pg_repo_remote_multitag):
    execute_commands(load_splitfile("from_remote.splitfile"),
                     params={"TAG": "v1"},
                     output=OUTPUT)
    recreated_commands = OUTPUT.head.to_splitfile()

    assert recreated_commands == [
        "FROM test/pg_mount:%s" %
        pg_repo_remote_multitag.images["v1"].image_hash,
        # Test provenance is recorded using the reformatted SQL
        """SQL {CREATE TABLE join_table
  AS SELECT fruit_id AS id
          , fruits.name AS fruit
          , vegetables.name AS vegetable
     FROM fruits
          INNER JOIN vegetables ON fruit_id = vegetable_id}""",
    ]
コード例 #30
0
def test_advanced_splitfile(pg_repo_local):
    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)

    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "my_fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "vegetables")
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "fruits")
    assert OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")

    head = OUTPUT.head
    old_head = head.parent_id
    OUTPUT.images.by_hash(old_head).checkout()
    assert not OUTPUT.engine.table_exists(OUTPUT.to_schema(), "join_table")
    head.checkout()
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (2, "orange", "carrot")
    ]
    assert OUTPUT.run_sql("SELECT * FROM my_fruits") == [(2, "orange")]