def test_functional(tmp_dir_fixture, monkeypatch):  # NOQA
    from smarttoolbase import SmartTool

    input_admin_metadata = dtoolcore.generate_admin_metadata(
        "my_input_ds", "testing_bot")
    input_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=input_admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")
    input_dataset.create()
    input_dataset.put_readme("")
    input_dataset.freeze()

    output_admin_metadata = dtoolcore.generate_admin_metadata(
        "my_output_ds", "testing_bot")
    output_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=output_admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")
    output_dataset.create()
    output_dataset.put_readme("")

    with SmartTool(
            input_uri=input_dataset.uri,
            output_uri=output_dataset.uri,
    ) as smart_tool:

        assert smart_tool.input_dataset.uri == input_dataset.uri
        assert smart_tool.output_proto_dataset.uri == output_dataset.uri

        smart_tool.base_commands = [
            "bowtie2 -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S {output_fpath}",
        ]
        smart_tool.outputs = []

        smart_tool.base_command_props = {
            "reference_prefix": "/tmp/reference/Athaliana",
            "forward_read_fpath": "/tmp/input/data/read1.fq",
            "reverse_read_fpath": "/tmp/input/data/read2.fq",
            "output_fpath": "/tmp/working/output",
        }

        expected_command_list = [
            "bowtie2", "-x", "/tmp/reference/Athaliana", "-1",
            "/tmp/input/data/read1.fq", "-2", "/tmp/input/data/read2.fq", "-S",
            "/tmp/working/output"
        ]

        #       assert smart_tool.command_list("identifier") == expected_command_list

        import subprocess
        subprocess.call = MagicMock()

        smart_tool.pre_run = MagicMock()

        smart_tool("identifier")

        subprocess.call.assert_called_once_with(
            expected_command_list, cwd=smart_tool.working_directory)
        smart_tool.pre_run.assert_called_once()
Example #2
0
def test_diff_sizes(tmp_uri_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_sizes

    fpaths = create_test_files(tmp_uri_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        base_uri=tmp_uri_fixture)
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["he"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        base_uri=tmp_uri_fixture)
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_sizes(ds_a, ds_a) == []

    expected = [
        (generate_identifier("file.txt"), 2, 3),
    ]
    assert diff_sizes(ds_a, ds_b) == expected
Example #3
0
def generate_dataset(base_uri, name, size, num_files):
    #   print(
    #       "Generating dataset in {} with {} files of size {} bytes".format(
    #           storage, num_files, size
    #       )
    #   )
    admin_metadata = generate_admin_metadata(name=name,
                                             creator_username="******")
    proto_dataset = generate_proto_dataset(admin_metadata, base_uri)
    proto_dataset.create()
    proto_dataset.put_readme("")

    for i in range(num_files):
        handle = "{}.txt".format(i)

        with tempfile.NamedTemporaryFile() as fp:
            fp.write(os.urandom(size))
            fp.flush()
            proto_dataset.put_item(fp.name, handle)
            proto_dataset.add_item_metadata(handle, "number", i)

    start = time.time()
    #   cProfile.runctx("proto_dataset.freeze()", {"proto_dataset": proto_dataset}, {}, sort="cumtime")
    proto_dataset.freeze()
    elapsed = time.time() - start

    #   print("Freezing {} took: {}s".format(name, elapsed))
    print("{},{}".format(num_files, elapsed))
def test_update_name_of_frozen_dataset(tmp_uri_fixture):  # NOQA

    import dtoolcore

    # Create a dataset.
    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture)
    proto_dataset.create()
    proto_dataset.freeze()

    dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    assert dataset.name == "test_name"

    dataset.update_name("updated_name")
    assert dataset.name == "updated_name"

    dataset_again = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    assert dataset_again.name == "updated_name"

    # Make sure that none of the other admin metadata has been altered.
    for key, value in admin_metadata.items():
        if key == "name":
            continue
        assert dataset_again._admin_metadata[key] == value
Example #5
0
def test_http_enable_with_presigned_url(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    # Add an annotation.
    dataset.put_annotation("project", "dtool-testing")

    # Add tags.
    dataset.put_tag("amazing")
    dataset.put_tag("stuff")

    with tmp_env_var("DTOOL_S3_PUBLISH_EXPIRY", "120"):
        access_url = dataset._storage_broker.http_enable()
    assert access_url.find("?") != -1  # This is a presigned URL dataset.

    assert access_url.startswith("https://")

    dataset_from_http = DataSet.from_uri(access_url)

    # Assert that the annotation has been copied across.
    assert dataset_from_http.get_annotation("project") == "dtool-testing"

    # Asser that the tags are available.
    assert dataset_from_http.list_tags() == ["amazing", "stuff"]

    from dtoolcore.compare import (diff_identifiers, diff_sizes, diff_content)

    assert len(diff_identifiers(dataset, dataset_from_http)) == 0
    assert len(diff_sizes(dataset, dataset_from_http)) == 0
    assert len(diff_content(dataset_from_http, dataset)) == 0

    # Make sure that all the URLs in the manifest are presigned.
    http_manifest = dataset_from_http._storage_broker.http_manifest
    assert http_manifest["manifest_url"].find("?") != -1
    assert http_manifest["readme_url"].find("?") != -1
    for url in http_manifest["item_urls"].values():
        assert url.find("?") != -1
    for url in http_manifest["annotations"].values():
        assert url.find("?") != -1
def test_basic_workflow_on_first_namespace(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')

    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Example #7
0
def test_copy(tmp_dir_fixture):  # NOQA

    import dtoolcore

    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")

    assert proto_dataset.name == "test_name"

    proto_dataset.update_name("test_new_name")

    assert proto_dataset.name == "test_new_name"

    proto_dataset.create()

    proto_dataset.update_name("test_another_new_name")

    assert proto_dataset.name == "test_another_new_name"

    read_proto_dataset = dtoolcore.ProtoDataSet.from_uri(proto_dataset.uri)

    assert read_proto_dataset.name == "test_another_new_name"
def test_works_if_all_set(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)
    access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name)
    secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    env_vars = {
        endpoint_key: "https://s3.amazonaws.com",
        access_key: S3_TEST_ACCESS_KEY_ID,
        secret_access_key: S3_TEST_SECRET_ACCESS_KEY,
    }

    a, b, c = list(env_vars.keys())
    with tmp_env_var(a, env_vars[a]):
        with tmp_env_var(b, env_vars[b]):
            with tmp_env_var(c, env_vars[c]):
                proto_dataset = ProtoDataSet(dest_uri, admin_metadata)
                proto_dataset.create()
Example #9
0
def test_basic_workflow(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Example #10
0
def create_proto_dataset(base_uri, name, username):
    admin_metadata = dtoolcore.generate_admin_metadata(name, username)
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=base_uri)
    proto_dataset.create()
    proto_dataset.put_readme("")
    return proto_dataset
Example #11
0
def test_copy_resume_fixes_broken_files(tmp_uri_fixture):  # NOQA

    import dtoolcore

    src_dir = os.path.join(uri_to_path(tmp_uri_fixture), "src")
    dest_dir = os.path.join(uri_to_path(tmp_uri_fixture), "dest")
    for directory in [src_dir, dest_dir]:
        os.mkdir(directory)

    # Create the src dataset to be copied.
    admin_metadata = dtoolcore.generate_admin_metadata("test_copy")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture + "/src")
    proto_dataset.create()
    src_uri = proto_dataset.uri

    proto_dataset.put_readme("---\nproject: exciting\n")

    overlay = "file_extension"
    for fname in os.listdir(TEST_SAMPLE_DATA):
        _, ext = os.path.splitext(fname)
        item_fpath = os.path.join(TEST_SAMPLE_DATA, fname)
        proto_dataset.put_item(item_fpath, fname)
        proto_dataset.add_item_metadata(fname, overlay, ext)

    proto_dataset.freeze()

    # Create a partial copy.
    src_dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    dest_proto_dataset = dtoolcore._copy_create_proto_dataset(
        src_dataset, tmp_uri_fixture + "/dest")
    broken_content_fpath = os.path.join(TEST_SAMPLE_DATA, "another_file.txt")
    dest_proto_dataset.put_item(broken_content_fpath, "random_bytes")

    # Copy resume should work.
    dest_uri = dtoolcore.copy_resume(src_uri, tmp_uri_fixture + "/dest")

    # Compare the two datasets.
    src_ds = dtoolcore.DataSet.from_uri(src_uri)
    dest_ds = dtoolcore.DataSet.from_uri(dest_uri)

    for key, value in src_ds._admin_metadata.items():
        assert dest_ds._admin_metadata[key] == value

    assert src_ds.identifiers == dest_ds.identifiers
    for i in src_ds.identifiers:
        src_item_props = src_ds.item_properties(i)
        dest_item_props = dest_ds.item_properties(i)
        for key, value in src_item_props.items():
            if key == "utc_timestamp":
                tolerance = 2  # seconds (number chosen arbitrarily)
                assert dest_item_props[key] >= value
                assert dest_item_props[key] < value + tolerance
            else:
                assert dest_item_props[key] == value

    assert src_ds.get_readme_content() == dest_ds.get_readme_content()

    assert src_ds.list_overlay_names() == dest_ds.list_overlay_names()
    assert src_ds.get_overlay(overlay) == dest_ds.get_overlay(overlay)
Example #12
0
def test_annotations(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    assert dataset.list_annotation_names() == []

    dataset.put_annotation("project", "demo")
    assert dataset.get_annotation("project") == "demo"

    assert dataset.list_annotation_names() == ["project"]
def test_writing_of_dtool_readme_file(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata

    # Create a proto dataset.
    uuid, dest_uri = tmp_uuid_and_uri
    name = "test_dtool_readme_file"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None
    )
    proto_dataset.create()

    # Check that the ".dtool/README.txt" file exists.
    expected_s3_key = uuid + '/README.txt'
    assert _key_exists_in_storage_broker(
        proto_dataset._storage_broker,
        expected_s3_key
    )

    actual_content = _get_unicode_from_key(
        proto_dataset._storage_broker,
        expected_s3_key
    )
    assert actual_content.startswith("README")
Example #14
0
def test_list_dataset_uris(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=CONFIG_PATH)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny with space.png')
    proto_dataset.freeze()

    from dtool_azure.storagebroker import AzureStorageBroker
    assert len(AzureStorageBroker.list_dataset_uris(
        dest_uri,
        CONFIG_PATH)
    ) > 0
Example #15
0
def test_item_local_abspath_with_clean_cache(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    uuid, dest_uri = tmp_uuid_and_uri

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    identifier = generate_identifier('tiny.png')

    with tmp_directory() as cache_dir:
        with tmp_env_var("DTOOL_S3_CACHE_DIRECTORY", cache_dir):

            dataset = DataSet.from_uri(dest_uri)
            fpath = dataset.item_content_abspath(identifier)

            assert os.path.isfile(fpath)
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              prefix=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("./my_dataset")
    expected_uri = "file://" + os.path.abspath("my_dataset")
    assert dataset.uri == expected_uri
def test_fails_if_any_endpoint_is_missing(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)
    access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name)
    secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    env_vars = {
        endpoint_key: "https://s3.amazonaws.com",
        access_key: S3_TEST_ACCESS_KEY_ID,
        secret_access_key: S3_TEST_SECRET_ACCESS_KEY,
    }

    from itertools import combinations
    for a, b in combinations(env_vars.keys(), 2):
        with tmp_env_var(a, env_vars[a]):
            with tmp_env_var(b, env_vars[b]):
                with pytest.raises(RuntimeError):
                    ProtoDataSet(dest_uri, admin_metadata)
Example #18
0
def test_diff_content(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_content
    from dtoolcore.storagebroker import DiskStorageBroker

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_content(ds_a, ds_a) == []

    identifier = generate_identifier("file.txt")
    expected = [
        (generate_identifier("file.txt"),
         DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)),
         DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier)))
    ]
    assert diff_content(ds_a, ds_b) == expected
Example #19
0
def proto_dataset_from_base_uri(name, base_uri):

    admin_metadata = dtoolcore.generate_admin_metadata(name)
    parsed_base_uri = dtoolcore.utils.generous_parse_uri(base_uri)

    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        base_uri=dtoolcore.utils.urlunparse(parsed_base_uri))

    proto_dataset.create()

    return proto_dataset
Example #20
0
def tmp_uuid_and_uri(request):
    admin_metadata = generate_admin_metadata("test_dataset")
    uuid = admin_metadata["uuid"]

    uri = AzureStorageBroker.generate_uri("test_dataset", uuid,
                                          AZURE_TEST_BASE_URI)

    @request.addfinalizer
    def teardown():
        _remove_dataset(uri)

    return (uuid, uri)
Example #21
0
def tmp_uuid_and_uri(request):
    admin_metadata = generate_admin_metadata("test_dataset")
    uuid = admin_metadata["uuid"]

    uri = IrodsStorageBroker.generate_uri("test_dataset", uuid, TEST_ZONE)

    @request.addfinalizer
    def teardown():
        _, irods_path = uri.split(":", 1)
        _rm_if_exists(irods_path)

    return (uuid, uri)
Example #22
0
def tmp_uuid_and_uri(request):
    admin_metadata = generate_admin_metadata("test_dataset")
    uuid = admin_metadata["uuid"]

    uri = S3StorageBroker.generate_uri("test_dataset", uuid,
                                       "s3://test-dtool-s3-bucket")

    @request.addfinalizer
    def teardown():
        _remove_dataset(uri)

    return (uuid, uri)
Example #23
0
def tmp_uuid_and_uri(request):
    admin_metadata = generate_admin_metadata("test_dataset")
    uuid = admin_metadata["uuid"]

    # The namespace needs to be configured in ~/.config/dtool/dtool.json

    uri = ECSStorageBroker.generate_uri("test_dataset", uuid, "ecs://test1")

    @request.addfinalizer
    def teardown():
        _remove_dataset(uri)

    return (uuid, uri)
Example #24
0
def test_tags_functional(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()

    # Test put_tag on proto dataset.
    proto_dataset.put_tag("testing")

    proto_dataset.freeze()

    dataset = DataSet.from_uri(proto_dataset.uri)
    assert dataset.list_tags() == ["testing"]

    dataset.put_tag("amazing")
    dataset.put_tag("stuff")
    assert dataset.list_tags() == ["amazing", "stuff", "testing"]

    dataset.delete_tag("stuff")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Putting the same tag is idempotent.
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Tags can only be strings.
    from dtoolcore import DtoolCoreValueError
    with pytest.raises(DtoolCoreValueError):
        dataset.put_tag(1)

    # Tags need to adhere to the utils.name_is_valid() rules.
    from dtoolcore import DtoolCoreInvalidNameError
    with pytest.raises(DtoolCoreInvalidNameError):
        dataset.put_tag("!invalid")

    # Deleting a non exiting tag does not raise. It silently succeeds.
    dataset.delete_tag("dontexist")
Example #25
0
def test_status_command_on_proto_dataset_functional(tmp_dir_fixture):  # NOQA
    from dtoolcore import generate_admin_metadata, generate_proto_dataset
    from dtool_info.dataset import status

    admin_metadata = generate_admin_metadata("test_ds")
    proto_dataset = generate_proto_dataset(admin_metadata=admin_metadata,
                                           base_uri=tmp_dir_fixture)
    proto_dataset.create()

    runner = CliRunner()

    result = runner.invoke(status, [proto_dataset.uri])
    assert result.exit_code == 0
    assert result.output.strip() == "proto"
Example #26
0
def test_basic_workflow_with_nested_handle(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    handle = "subdir/tiny.png"

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, handle)

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier(handle)
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1

    # Ensure that the file exists in the disk dataset.
    # Particularly on Windows.
    item_abspath = os.path.join(
        tmp_dir_fixture,
        name,
        "data",
        "subdir",
        "tiny.png"
    )
    assert os.path.isfile(item_abspath)
    assert os.path.isfile(dataset.item_content_abspath(expected_identifier))

    # Ensure that the correct abspath is returned.
    # Particularly on Windows.
    assert dataset.item_content_abspath(expected_identifier) == item_abspath  # NOQA
Example #27
0
def test_diff_identifiers(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_identifiers

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "a.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["cat"], "b.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_identifiers(ds_a, ds_a) == []

    expected = [(generate_identifier("a.txt"), True, False),
                (generate_identifier("b.txt"), False, True)]
    assert diff_identifiers(ds_a, ds_b) == expected
Example #28
0
def create_derived_dataset(parent_dataset, dest_location_uri, name_suffix):

    dest_dataset_name = "{}_{}".format(parent_dataset.name, name_suffix)

    admin_metadata = dtoolcore.generate_admin_metadata(dest_dataset_name)
    dest_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        base_uri=dest_location_uri,
        config_path=CONFIG_PATH)
    try:
        dest_dataset.create()
    except dtoolcore.storagebroker.StorageBrokerOSError as err:
        raise click.UsageError(str(err))

    return dest_dataset
def test_fails_if_only_endpoint_is_set(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    with tmp_env_var(endpoint_key, "https://s3.amazonaws.com"):
        with pytest.raises(RuntimeError):
            ProtoDataSet(dest_uri, admin_metadata)
Example #30
0
def create(quiet, name, storage, prefix):
    """Create a proto dataset."""
    admin_metadata = dtoolcore.generate_admin_metadata(name)

    # Create the dataset.
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        prefix=prefix,
        storage=storage,
        config_path=CONFIG_PATH)
    try:
        proto_dataset.create()
    except dtoolcore.storagebroker.StorageBrokerOSError as err:
        raise click.UsageError(str(err))

    proto_dataset.put_readme("")

    if quiet:
        click.secho(proto_dataset.uri)
    else:
        # Give the user some feedback and hints on what to do next.
        click.secho("Created proto dataset ", nl=False, fg="green")
        click.secho(proto_dataset.uri)
        click.secho("Next steps: ")

        step = 1
        click.secho("{}. Add descriptive metadata, e.g: ".format(step))
        click.secho("   dtool readme interactive {}".format(proto_dataset.uri),
                    fg="cyan")

        if storage != "symlink":
            step = step + 1
            click.secho("{}. Add raw data, eg:".format(step))
            click.secho("   dtool add item my_file.txt {}".format(
                proto_dataset.uri),
                        fg="cyan")

            if storage == "file":
                # Find the abspath of the data directory for user feedback.
                data_path = proto_dataset._storage_broker._data_abspath
                click.secho("   Or use your system commands, e.g: ")
                click.secho("   mv my_data_directory {}/".format(data_path),
                            fg="cyan")

        step = step + 1
        click.secho(
            "{}. Convert the proto dataset into a dataset: ".format(step))
        click.secho("   dtool freeze {}".format(proto_dataset.uri), fg="cyan")