예제 #1
0
def test_splitfile_rebuild_update(local_engine_empty, pg_repo_remote_multitag):
    runner = CliRunner()

    result = runner.invoke(
        build_c,
        [
            RESOURCES + "import_remote_multiple.splitfile", "-a", "TAG", "v1",
            "-o", "output"
        ],
    )
    assert result.exit_code == 0

    # Rerun the output:latest against v2 of the test/pg_mount
    result = runner.invoke(rebuild_c,
                           ["output:latest", "--against", "test/pg_mount:v2"])
    output_v2 = OUTPUT.head
    assert result.exit_code == 0
    v2 = pg_repo_remote_multitag.images["v2"]
    assert output_v2.provenance() == [(pg_repo_remote_multitag, v2.image_hash)]

    # Now rerun the output:latest against the latest version of everything.
    # In this case, this should all resolve to the same version of test/pg_mount (v2) and not produce
    # any extra commits.
    curr_commits = OUTPUT.images()
    result = runner.invoke(rebuild_c, ["output:latest", "-u"])
    assert result.exit_code == 0
    assert output_v2 == OUTPUT.head
    assert OUTPUT.images() == curr_commits
예제 #2
0
def test_splitfile_cached(pg_repo_local):
    # Check that no new commits/snaps are created if we rerun the same splitfile
    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    images = OUTPUT.images()
    assert len(images) == 4

    execute_commands(
        load_splitfile("import_local_multiple_with_queries.splitfile"),
        output=OUTPUT)
    new_images = OUTPUT.images()
    assert new_images == images
예제 #3
0
def test_splitfile_sql_failure(local_engine_empty, pg_repo_remote_multitag):
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(psycopg2.errors.UndefinedTable) as e:
        execute_commands(load_splitfile("import_remote_broken_stage_2.splitfile"), output=OUTPUT)
    assert 'relation "nonexistent_fruits_table" does not exist' in str(e.value)

    # Check the execution created the first dummy (000...) image and the second image
    # with IMPORT results
    assert len(OUTPUT.images()) == 2
    assert _get_table_count(OUTPUT) == 2
    assert sorted(OUTPUT.images["latest"].get_tables()) == ["my_fruits", "vegetables"]
예제 #4
0
def test_rollback_on_error(local_engine_empty):
    # For e.g. commit/checkout/other commands, we don't do commits/rollbacks
    # in the library itself and expect the caller to manage transactions. In CLI,
    # we need to make sure that erroneous transactions (e.g. interrupted SG commits)
    # are rolled back correctly instead of being committed.
    runner = CliRunner()

    OUTPUT.init()
    OUTPUT.run_sql(
        "CREATE TABLE test (key INTEGER PRIMARY KEY, value_1 VARCHAR, value_2 INTEGER)"
    )
    for i in range(11):
        OUTPUT.run_sql("INSERT INTO test VALUES (%s, %s, %s)",
                       (i + 1, chr(ord("a") + i), i * 2))
    head = OUTPUT.commit(chunk_size=5,
                         in_fragment_order={"test": ["key", "value_1"]})
    assert len(OUTPUT.images()) == 2
    assert len(OUTPUT.objects.get_all_objects()) == 3

    _alter_diff_splitting_dataset()
    OUTPUT.commit_engines()

    # Simulate the commit getting interrupted by the first object going through and being
    # recorded, then a KeyboardInterrupt being raised.
    called_once = False

    def interrupted_register(*args, **kwargs):
        nonlocal called_once
        if called_once:
            raise BaseException("something went wrong")
        else:
            called_once = True
            return FragmentManager._register_object(*args, **kwargs)

    with patch(
            "splitgraph.core.fragment_manager.FragmentManager._register_object",
            side_effect=interrupted_register,
    ) as ro:
        with pytest.raises(BaseException):
            runner.invoke(cli, ["commit", OUTPUT.to_schema()])

    # Check that no image/object metadata was written
    assert len(OUTPUT.images()) == 2
    assert len(OUTPUT.objects.get_all_objects()) == 3

    assert ro.call_count == 2

    # Check that the data in the audit trigger wasn't deleted
    assert len(
        OUTPUT.engine.get_pending_changes(OUTPUT.to_schema(),
                                          table="test")) == 6
예제 #5
0
def test_splitfile_object_download_failure(local_engine_empty, pg_repo_remote_multitag):
    # Simulate an object download failure (that happens inside of the engine during IMPORT
    # execution) propagating to the caller and not leaving the engine in an inconsistent state.

    object_id = pg_repo_remote_multitag.images["v1"].get_table("fruits").objects[0]
    assert object_id == "o0e742bd2ea4927f5193a2c68f8d4c51ea018b1ef3e3005a50727147d2cf57b"
    tmp_object_id = "o" + "0" * 62

    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(object_id), Identifier(tmp_object_id)
        )
    )

    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    with pytest.raises(ObjectCacheError) as e:
        execute_commands(
            load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
        )
    assert "Missing 1 object (%s)" % object_id in str(e.value)

    # Check the execution didn't create the image
    assert len(OUTPUT.images()) == 0
    assert _get_table_count(OUTPUT) == 0

    # Rename the object back and retry the Splitfile
    pg_repo_remote_multitag.engine.run_sql(
        SQL("ALTER TABLE splitgraph_meta.{} RENAME TO {}").format(
            Identifier(tmp_object_id), Identifier(object_id)
        )
    )

    execute_commands(
        load_splitfile("import_remote_multiple.splitfile"), params={"TAG": "v1"}, output=OUTPUT
    )
    OUTPUT.head.checkout()
    assert OUTPUT.run_sql("SELECT id, fruit, vegetable FROM join_table") == [
        (1, "apple", "potato"),
        (2, "orange", "carrot"),
    ]

    assert len(OUTPUT.images()) == 3
    # 2 tables in the first non-empty image, 3 tables in the second image
    # (previous 2 + joined data).
    assert _get_table_count(OUTPUT) == 5
예제 #6
0
def test_provenance_inline_sql(readonly_pg_repo, pg_repo_local):
    prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True)
    pg_repo_local.head.tag("v2")

    execute_commands(
        load_splitfile("inline_sql.splitfile"),
        output=OUTPUT,
    )

    new_head = OUTPUT.head

    remote_input = readonly_pg_repo.images["latest"]
    local_input = pg_repo_local.images["latest"]

    assert set(new_head.provenance()) == {
        (
            readonly_pg_repo,
            remote_input.image_hash,
        ),
        (pg_repo_local, local_input.image_hash),
    }

    assert remote_input.provenance(reverse=True, engine=OUTPUT.engine) == [
        (OUTPUT, OUTPUT.head.image_hash)
    ]

    assert local_input.provenance(reverse=True, engine=OUTPUT.engine) == [
        (OUTPUT, OUTPUT.head.image_hash)
    ]
    expected_sql = ("SQL {{CREATE TABLE balanced_diet\n"
                    "  AS SELECT fruits.fruit_id AS id\n"
                    "          , fruits.name AS fruit\n"
                    "          , my_fruits.timestamp AS timestamp\n"
                    "          , vegetables.name AS vegetable\n"
                    "     FROM "
                    '"otheruser/pg_mount:{0}".fruits AS '
                    "fruits\n"
                    "          INNER JOIN "
                    '"otheruser/pg_mount:{0}".vegetables '
                    "AS vegetables ON fruits.fruit_id = vegetable_id\n"
                    "          LEFT JOIN "
                    '"test/pg_mount:{1}".fruits AS '
                    "my_fruits ON my_fruits.fruit_id = fruits.fruit_id;\n"
                    "\n"
                    "ALTER TABLE balanced_diet ADD PRIMARY KEY (id)}}").format(
                        remote_input.image_hash, local_input.image_hash)

    assert new_head.to_splitfile() == [expected_sql]

    assert new_head.to_splitfile(source_replacement={
        pg_repo_local: "new_local_tag",
        readonly_pg_repo: "new_remote_tag"
    }) == [
        expected_sql.replace(remote_input.image_hash,
                             "new_remote_tag").replace(local_input.image_hash,
                                                       "new_local_tag")
    ]

    assert len(OUTPUT.images()) == 2

    # Try rerunning the Splitfile against the same original data (check caching)
    rebuild_image(
        OUTPUT.head,
        source_replacement={
            pg_repo_local: "latest",
            readonly_pg_repo: "latest"
        },
    )

    assert len(OUTPUT.images()) == 2

    # Change pg_repo_local and rerun the Splitfile against it.
    pg_repo_local.run_sql(
        "UPDATE fruits SET timestamp = '2020-01-01 12:00:00' WHERE fruit_id = 2"
    )
    new_head = pg_repo_local.commit()

    rebuild_image(
        OUTPUT.head,
        source_replacement={
            pg_repo_local: new_head.image_hash,
            readonly_pg_repo: "latest"
        },
    )

    assert len(OUTPUT.images()) == 3
    assert OUTPUT.run_sql("SELECT * FROM balanced_diet") == [
        (1, "apple", None, "potato"),
        (2, "orange", datetime.datetime(2020, 1, 1, 12, 0), "carrot"),
    ]