예제 #1
0
def test_size_mismatch():
    """
    Test that an error is raised when two manifests have rows with same md5 but
    different sizes.
    """
    with pytest.raises(csv.Error):
        merge_bucket_manifests(
            directory="tests/merge_manifests/size_mismatch/input",
            output_manifest="merged-output-test-manifest.tsv",
            columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
        )
예제 #2
0
def test_multiple_urls():
    """
    Test input manifest having a row with multiple urls.
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/multiple_urls/input",
        output_manifest="merged-output-test-manifest.tsv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
    )
    assert _get_tsv_data("merged-output-test-manifest.tsv") == _get_tsv_data(
        "tests/merge_manifests/multiple_urls/expected-merged-output-manifest.tsv"
    )
예제 #3
0
def test_writing_to_csv():
    """
    Test that output manifest can be written as csv.
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/regular/input/",
        output_manifest="merged-output-test-manifest.csv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
    )
    assert _get_tsv_data("merged-output-test-manifest.csv", ",") == _get_tsv_data(
        "tests/merge_manifests/regular/expected-merged-output-manifest.tsv"
    )
예제 #4
0
def test_duplicate_values():
    """
    Test two input manifests having duplicate values ("sushi" in manifest2.tsv
    and manifest3.tsv)
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/duplicate_values/input",
        output_manifest="merged-output-test-manifest.tsv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data", "food"],
    )
    assert _get_tsv_data("merged-output-test-manifest.tsv") == _get_tsv_data(
        "tests/merge_manifests/duplicate_values/expected-merged-output-manifest.tsv"
    )
예제 #5
0
def test_regular_merge_bucket_manifests():
    """
    Test that the output manifest produced by merge_bucket_manifests for a
    given input directory matches the expected output manifest.
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/regular/input/",
        output_manifest="merged-output-test-manifest.tsv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
    )
    assert _get_tsv_data("merged-output-test-manifest.tsv") == _get_tsv_data(
        "tests/merge_manifests/regular/expected-merged-output-manifest.tsv"
    )
예제 #6
0
def test_same_guid_for_same_hash():
    """
    Test input manifests with rows having matching guids, md5, and size.
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/same_guid_for_same_hash/input",
        output_manifest="merged-output-test-manifest.tsv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
        allow_mult_guids_per_hash=True,
    )
    assert _get_tsv_data("merged-output-test-manifest.tsv") == _get_tsv_data(
        "tests/merge_manifests/same_guid_for_same_hash/expected-merged-output-manifest.tsv"
    )
예제 #7
0
def test_multiple_guids_per_hash():
    """
    Test multiple guids per hash.
    """
    merge_bucket_manifests(
        directory="tests/merge_manifests/multiple_guids_per_hash/input",
        output_manifest="merged-output-test-manifest.tsv",
        columns_with_arrays=["extra_data", "more_data", "some_additional_data"],
        allow_mult_guids_per_hash=True,
    )
    assert _get_tsv_data("merged-output-test-manifest.tsv") == _get_tsv_data(
        "tests/merge_manifests/multiple_guids_per_hash/expected-merged-output-manifest.tsv"
    )