Esempio n. 1
0
def test_store_and_retrieve_item_metadata(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    handle = 'dummy'

    # Here we add two set of metadata with different keys
    storagebroker.add_item_metadata(handle=handle, key='foo', value='bar')
    storagebroker.add_item_metadata(handle=handle,
                                    key='key',
                                    value={
                                        'subkey': 'subval',
                                        'morekey': 'moreval'
                                    })

    # Test metadata retrieval (we get back both sets of metadata)
    metadata = storagebroker.get_item_metadata(handle)
    assert metadata == {
        'foo': 'bar',
        'key': {
            'subkey': 'subval',
            'morekey': 'moreval'
        }
    }
Esempio n. 2
0
def test_pre_freeze_hook(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    # Add a data file.
    data_fpath = os.path.join(storagebroker._data_abspath, "sample.txt")
    with open(data_fpath, "w") as fh:
        fh.write("some sample data")

    # The below should not raise an DiskStorageBrokerValidationError
    # because the structure is correct.
    storagebroker.pre_freeze_hook()

    # Now we add a rogue file.
    rogue_fpath = os.path.join(destination_path, "rogue.txt")
    with open(rogue_fpath, "w") as fh:
        fh.write("I should not be here")

    from dtoolcore.storagebroker import DiskStorageBrokerValidationWarning
    with pytest.raises(DiskStorageBrokerValidationWarning):
        storagebroker.pre_freeze_hook()
Esempio n. 3
0
def test_store_and_retrieve_manifest(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    manifest = {'a': 'b', 'c': [1, 2, 3]}

    storagebroker.put_manifest(manifest)

    retrieved_manifest = storagebroker.get_manifest()

    assert retrieved_manifest == manifest

    # Test the formatting on disk.
    #   expected = '{\n  "a": "b", \n  "c": [\n    1, \n    2, \n    3\n  ]\n}'
    expected_lines = [
        '{', '  "a": "b",', '  "c": [', '    1,', '    2,', '    3', '  ]', '}'
    ]

    with open(storagebroker.get_manifest_key()) as fh:
        for i, actual in enumerate(fh):
            actual = actual.rstrip()
            expected = expected_lines[i]
            assert actual == expected
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              prefix=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("./my_dataset")
    expected_uri = "file://" + os.path.abspath("my_dataset")
    assert dataset.uri == expected_uri
Esempio n. 5
0
def test_basic_workflow(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Esempio n. 6
0
def test_post_freeze_hook(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    # The below should not raise an OSError because the .dtool/tmp_fragments
    # directory has not been created.
    storagebroker.post_freeze_hook()

    handle = 'dummy'
    storagebroker.add_item_metadata(handle, key='foo', value='bar')

    assert os.path.isdir(storagebroker._metadata_fragments_abspath)
    storagebroker.post_freeze_hook()
    assert not os.path.isdir(storagebroker._metadata_fragments_abspath)
Esempio n. 7
0
def test_unix_relpaths_from_iter_item_handles(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    # Add a data file.
    data_subdir = os.path.join(storagebroker._data_abspath, "level")
    os.mkdir(data_subdir)
    data_fpath = os.path.join(data_subdir, "sample.txt")
    with open(data_fpath, "w") as fh:
        fh.write("some sample data")

    handles = [h for h in storagebroker.iter_item_handles()]
    assert len(handles) == 1
    assert handles[0] == "level/sample.txt"
Esempio n. 8
0
def test_has_admin_metadata(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    assert not storagebroker.has_admin_metadata()

    storagebroker.create_structure()
    assert not storagebroker.has_admin_metadata()

    admin_metadata = {'hello': 'world'}
    storagebroker.put_admin_metadata(admin_metadata)
    assert storagebroker.has_admin_metadata()
Esempio n. 9
0
def test_store_and_retrieve_admin_metadata(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    admin_metadata = {'hello': 'world'}
    storagebroker.put_admin_metadata(admin_metadata)

    storagebroker_2 = DiskStorageBroker(destination_path)
    retrieved_admin_metadata = storagebroker_2.get_admin_metadata()
    assert retrieved_admin_metadata == admin_metadata
Esempio n. 10
0
def test_diff_content(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_content
    from dtoolcore.storagebroker import DiskStorageBroker

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_content(ds_a, ds_a) == []

    identifier = generate_identifier("file.txt")
    expected = [
        (generate_identifier("file.txt"),
         DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)),
         DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier)))
    ]
    assert diff_content(ds_a, ds_b) == expected
Esempio n. 11
0
def test_store_and_retrieve_readme(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    storagebroker.put_readme('Hello world')
    assert storagebroker.get_readme_content() == 'Hello world'
Esempio n. 12
0
def test_basic_workflow_with_nested_handle(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    handle = "subdir/tiny.png"

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, handle)

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier(handle)
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1

    # Ensure that the file exists in the disk dataset.
    # Particularly on Windows.
    item_abspath = os.path.join(
        tmp_dir_fixture,
        name,
        "data",
        "subdir",
        "tiny.png"
    )
    assert os.path.isfile(item_abspath)
    assert os.path.isfile(dataset.item_content_abspath(expected_identifier))

    # Ensure that the correct abspath is returned.
    # Particularly on Windows.
    assert dataset.item_content_abspath(expected_identifier) == item_abspath  # NOQA
Esempio n. 13
0
def test_list_dataset_uris(tmp_dir_fixture):  # NOQA

    import dtoolcore
    from dtoolcore.storagebroker import DiskStorageBroker

    assert [] == DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture,
                                                     config_path=None)

    # Create two datasets to be copied.
    expected_uris = []
    for name in ["test_ds_1", "test_ds_2"]:
        admin_metadata = dtoolcore.generate_admin_metadata(name)
        proto_dataset = dtoolcore.generate_proto_dataset(
            admin_metadata=admin_metadata,
            prefix=tmp_dir_fixture,
            storage="file")
        proto_dataset.create()
        expected_uris.append(proto_dataset.uri)

    actual_uris = DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture,
                                                      config_path=None)

    assert set(expected_uris) == set(actual_uris)
Esempio n. 14
0
def test_store_and_retrieve_item_metadata(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    example_overlay = {'abcdef': 1, 'ghijkl': 2}

    storagebroker.put_overlay(overlay_name="example", overlay=example_overlay)

    retrieved_overlay = storagebroker.get_overlay('example')

    assert example_overlay == retrieved_overlay
Esempio n. 15
0
def test_put_item(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    input_file_path = os.path.join(TEST_SAMPLE_DATA, 'tiny.png')

    storagebroker.put_item(fpath=input_file_path, relpath='tiny.png')

    handles = list(storagebroker.iter_item_handles())

    assert 'tiny.png' in handles
Esempio n. 16
0
def test_item_properties(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    input_file_path = os.path.join(TEST_SAMPLE_DATA, 'tiny.png')

    storagebroker.put_item(fpath=input_file_path, relpath='tiny.png')

    handles = list(storagebroker.iter_item_handles())

    handle = handles[0]

    item_properties = storagebroker.item_properties(handle)

    # Check size_in_bytes property
    assert item_properties['size_in_bytes'] == 276

    # Check timestamp property
    assert 'utc_timestamp' in item_properties

    time_from_item = datetime.datetime.fromtimestamp(float(
        item_properties['utc_timestamp']),
                                                     tz=pytz.UTC)

    time.sleep(0.1)  # Make tests more robust on Windows.
    time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item

    assert time_delta.days == 0
    assert time_delta.seconds < 20

    # Check hash property
    from dtoolcore.filehasher import md5sum_hexdigest
    expected_hash = md5sum_hexdigest(input_file_path)

    assert item_properties['hash'] == expected_hash

    # Check relpath property
    assert item_properties['relpath'] == 'tiny.png'
Esempio n. 17
0
def test_list_overlays_when_dir_missing(chdir_fixture):  # NOQA
    """
    This test simulates checking out a frozen dataset from Git that has no
    overlays written to it, i.e. where the ``.dtool/overlays`` directory is
    missing.

    See also:
    https://github.com/jic-dtool/dtoolcore/issues/3
    """

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri="file://.")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Simulate the missing overlay directory.
    assert os.path.isdir(proto_dataset._storage_broker._overlays_abspath)
    os.rmdir(proto_dataset._storage_broker._overlays_abspath)
    assert not os.path.isdir(proto_dataset._storage_broker._overlays_abspath)

    dataset = DataSet.from_uri(proto_dataset.uri)

    # This call caused the bug.
    overlay_names = dataset.list_overlay_names()
    assert overlay_names == []
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker
    from dtoolcore.utils import (IS_WINDOWS, windows_to_unix_path, urlparse)

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("my_dataset")

    abspath = os.path.abspath("my_dataset")
    if IS_WINDOWS:
        abspath = windows_to_unix_path(abspath)
    assert dataset.uri.startswith("file://")
    assert dataset.uri.endswith(abspath)

    parsed = urlparse(dataset.uri)
    if IS_WINDOWS:
        assert parsed.netloc == ""
    else:
        assert parsed.netloc != ""
Esempio n. 19
0
def test_put_text_creates_missing_subdirectories(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')

    storagebroker = DiskStorageBroker(destination_path)

    assert not os.path.exists(destination_path)
    storagebroker.create_structure()
    assert os.path.isdir(destination_path)

    assert os.path.isdir(storagebroker._annotations_abspath)
    os.rmdir(storagebroker._annotations_abspath)
    assert not os.path.isdir(storagebroker._annotations_abspath)

    annotation_key = os.path.join(storagebroker._annotations_abspath, "a.json")
    storagebroker.put_text(annotation_key, "{}")
    assert os.path.isdir(storagebroker._annotations_abspath)

    assert os.path.isfile(annotation_key)
    assert not os.path.isdir(annotation_key)
Esempio n. 20
0
def test_put_get_annotation(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    # Test list annotation names.
    assert storagebroker.list_annotation_names() == []

    # Test various types of values.
    storagebroker.put_annotation("project", "value")
    assert storagebroker.get_annotation("project") == "value"

    storagebroker.put_annotation("project", 1)
    assert storagebroker.get_annotation("project") == 1
    assert type(storagebroker.get_annotation("project")) is int

    storagebroker.put_annotation("project", True)
    assert storagebroker.get_annotation("project") is True

    storagebroker.put_annotation("project", [1, 2, 3])
    assert storagebroker.get_annotation("project") == [1, 2, 3]

    storagebroker.put_annotation("project", {"a": 1})
    assert storagebroker.get_annotation("project") == {"a": 1}

    # Test list annotation names.
    assert storagebroker.list_annotation_names() == ["project"]
Esempio n. 21
0
def test_update_readme(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    storagebroker.put_readme('Hello world')
    assert storagebroker.get_readme_content() == 'Hello world'

    assert len(storagebroker._list_historical_readme_keys()) == 0

    storagebroker.update_readme('Updated')
    assert storagebroker.get_readme_content() == 'Updated'

    assert len(storagebroker._list_historical_readme_keys()) == 1

    with open(storagebroker._list_historical_readme_keys()[0]) as fh:
        assert fh.read() == 'Hello world'

    time.sleep(0.1)

    storagebroker.update_readme('Updated again')
    assert storagebroker.get_readme_content() == 'Updated again'

    assert len(storagebroker._list_historical_readme_keys()) == 2
def test_overlays_functional(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        ProtoDataSet,
        DtoolCoreKeyError,
        DtoolCoreTypeError,
        DtoolCoreValueError,
        DtoolCoreInvalidNameError,
        generate_admin_metadata,
        copy,
    )

    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset.
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    # Freeze the dataset
    proto_dataset.put_readme("")
    proto_dataset.freeze()

    # Load the dataset.
    dataset = DataSet.from_uri(proto_dataset.uri)

    # The overlay has not been added yet.
    with pytest.raises(DtoolCoreKeyError):
        dataset.get_overlay("is_png")

    # Create overlay content.
    expected_identifier = generate_identifier('tiny.png')
    is_png_overlay = {expected_identifier: True}

    with pytest.raises(DtoolCoreTypeError):
        dataset.put_overlay("is_png", "not_a_dict")

    incorrect_identifier_overlay = {"incorrect": True}
    with pytest.raises(DtoolCoreValueError):
        dataset.put_overlay("is_png", incorrect_identifier_overlay)

    invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81]
    for invalid_key in invalid_keys:
        with pytest.raises(DtoolCoreInvalidNameError):
            dataset.put_overlay(invalid_key, is_png_overlay)

    dataset.put_overlay("is_png", is_png_overlay)
    assert dataset.get_overlay("is_png") == is_png_overlay

    # Test copy.
    copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy")
    os.mkdir(copy_dataset_directory)
    dest_uri = dataset.base_uri + "/copy"
    copy_uri = copy(dataset.uri, dest_uri)

    copy_dataset = DataSet.from_uri(copy_uri)
    assert copy_dataset.list_overlay_names() == ["is_png"]
    assert copy_dataset.get_overlay("is_png") == is_png_overlay
Esempio n. 23
0
def test_create_structure(tmp_dir_fixture):  # NOQA

    from dtoolcore.storagebroker import DiskStorageBroker
    from dtoolcore.storagebroker import StorageBrokerOSError

    storagebroker = DiskStorageBroker(tmp_dir_fixture)

    with pytest.raises(StorageBrokerOSError):
        storagebroker.create_structure()

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')

    storagebroker = DiskStorageBroker(destination_path)

    assert not os.path.exists(destination_path)
    storagebroker.create_structure()
    assert os.path.isdir(destination_path)

    destination_path = os.path.join(tmp_dir_fixture, 'sub', 'my_proto_dataset')

    storagebroker = DiskStorageBroker(destination_path)

    with pytest.raises(OSError):
        storagebroker.create_structure()
def test_notify_route(tmp_app_with_users, tmp_dir_fixture):  # NOQA
    bucket_name = 'bucket'

    # Add local directory as base URI and assign URI to the bucket
    base_uri = sanitise_uri(tmp_dir_fixture)
    register_base_uri(base_uri)
    update_permissions({
        'base_uri': base_uri,
        'users_with_search_permissions': ['snow-white'],
        'users_with_register_permissions': ['snow-white'],
    })
    Config.BUCKET_TO_BASE_URI[bucket_name] = base_uri

    # Create test dataset
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    readme = 'abc: def'
    proto_dataset.put_readme(readme)
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1

    # Tell plugin that dataset has been created
    r = tmp_app_with_users.post(
        "/elastic-search/notify/all/{}".format(name),
        json={
            'bucket': bucket_name,
            'metadata': dataset._admin_metadata
        },
    )
    assert r.status_code == 200

    # Check that dataset has actually been registered
    datasets = list_datasets_by_user('snow-white')
    assert len(datasets) == 1
    assert datasets[0]['base_uri'] == base_uri
    assert datasets[0]['uri'] == dest_uri
    assert datasets[0]['uuid'] == admin_metadata['uuid']
    assert datasets[0]['name'] == name

    # Check README
    check_readme = get_readme_from_uri_by_user('snow-white', dest_uri)
    assert check_readme == yaml.load(readme)

    # Update README
    new_readme = 'ghi: jkl'
    dataset.put_readme(new_readme)

    # Notify plugin about updated name
    r = tmp_app_with_users.post(
        "/elastic-search/notify/all/{}".format(name),
        json={
            'bucket': bucket_name,
            'metadata': dataset._admin_metadata
        },
    )
    assert r.status_code == 200

    # Check dataset
    datasets = list_datasets_by_user('snow-white')
    assert len(datasets) == 1
    assert datasets[0]['base_uri'] == base_uri
    assert datasets[0]['uri'] == dest_uri
    assert datasets[0]['uuid'] == admin_metadata['uuid']
    assert datasets[0]['name'] == name

    # Check that README has actually been changed
    check_readme = get_readme_from_uri_by_user('snow-white', dest_uri)
    assert check_readme == yaml.load(new_readme)

    # Tell plugin that dataset has been deleted
    r = tmp_app_with_users.delete(
        "/elastic-search/notify/all/{}_{}/dtool".format(
            bucket_name, admin_metadata['uuid']))
    assert r.status_code == 200

    # Check that dataset has been deleted
    datasets = list_datasets_by_user('snow-white')
    assert len(datasets) == 0
Esempio n. 25
0
def test_initialise():

    from dtoolcore.storagebroker import DiskStorageBroker

    path = '/a/path'
    storagebroker = DiskStorageBroker(uri=path)  # NOQA
Esempio n. 26
0
def test_proto_dataset_freeze_functional(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        generate_admin_metadata,
        DataSet,
        ProtoDataSet,
        DtoolCoreTypeError
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "func_test_dataset_freeze"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)

    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None
    )
    proto_dataset.create()

    filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt']
    for filename in filenames:
        local_file_path = os.path.join(sample_data_path, filename)
        proto_dataset.put_item(local_file_path, filename)
        proto_dataset.add_item_metadata(
            filename,
            'namelen',
            len(filename)
        )
        proto_dataset.add_item_metadata(
            filename,
            'firstletter',
            filename[0]
        )

    proto_dataset.put_readme(content='Hello world!')

    # We shouldn't be able to load this as a DataSet
    with pytest.raises(DtoolCoreTypeError):
        DataSet.from_uri(dest_uri)

    proto_dataset.freeze()

    # Freezing removes the temporary metadata fragments directory.
    assert not os.path.isdir(
        proto_dataset._storage_broker._metadata_fragments_abspath)

    # Now we shouln't be able to load as a ProtoDataSet
    with pytest.raises(DtoolCoreTypeError):
        ProtoDataSet.from_uri(dest_uri)

    # But we can as a DataSet
    dataset = DataSet.from_uri(dest_uri)
    assert dataset.name == 'func_test_dataset_freeze'

    # Test identifiers
    expected_identifiers = map(generate_identifier, filenames)
    assert set(dataset.identifiers) == set(expected_identifiers)

    # Test readme contents
    assert dataset.get_readme_content() == "Hello world!"

    # Test item
    expected_identifier = generate_identifier('tiny.png')
    item_properties = dataset.item_properties(expected_identifier)
    assert item_properties['relpath'] == 'tiny.png'
    assert item_properties['size_in_bytes'] == 276
    assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755'

    # Test accessing item
    expected_identifier = generate_identifier('another_file.txt')
    fpath = dataset.item_content_abspath(expected_identifier)

    with open(fpath) as fh:
        contents = fh.read()

    assert contents == "Hello\n"

    # Test overlays have been created properly
    namelen_overlay = dataset.get_overlay('namelen')
    expected_identifier = generate_identifier('another_file.txt')
    assert namelen_overlay[expected_identifier] == len('another_file.txt')
Esempio n. 27
0
def test_creation_and_reading(tmp_dir_fixture):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "func_test_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)

    # Create a proto dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_readme("")

    assert proto_dataset.name == "func_test_dataset"

    # Test reading from URI.
    proto_dataset = ProtoDataSet.from_uri(dest_uri)
    assert proto_dataset.name == "func_test_dataset"

    # Test get/put readme.
    assert proto_dataset.get_readme_content() == ""
    proto_dataset.put_readme("Hello world!")
    assert proto_dataset.get_readme_content() == "Hello world!"

    # Test putting a local file
    handle = "tiny.png"
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    proto_dataset.put_item(local_file_path, handle)
    assert handle in list(proto_dataset._storage_broker.iter_item_handles())

    # Test properties of that file
    item_properties = proto_dataset._storage_broker.item_properties(handle)
    assert item_properties['relpath'] == 'tiny.png'
    assert item_properties['size_in_bytes'] == 276
    assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755'
    assert 'utc_timestamp' in item_properties
    time_from_item = datetime.datetime.fromtimestamp(
        float(item_properties['utc_timestamp']),
        tz=pytz.UTC
    )

    time.sleep(0.1)  # Make tests more robust on Windows.
    time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item

    assert time_delta.days == 0
    assert time_delta.seconds < 20

    # Add metadata
    proto_dataset.add_item_metadata(handle, 'foo', 'bar')
    proto_dataset.add_item_metadata(
        handle,
        'key',
        {'subkey': 'subval',
         'morekey': 'moreval'}
    )

    # Test metadata retrieval
    metadata = proto_dataset._storage_broker.get_item_metadata(handle)
    assert metadata == {
        'foo': 'bar',
        'key': {
            'subkey': 'subval',
            'morekey': 'moreval'
        }
    }

    # Add another item and test manifest
    from dtoolcore import __version__
    from dtoolcore.utils import generate_identifier
    second_fname = "random_bytes"
    local_file_path = os.path.join(sample_data_path, second_fname)
    proto_dataset.put_item(local_file_path, second_fname)
    second_handle = second_fname
    generated_manifest = proto_dataset.generate_manifest()
    assert generated_manifest['hash_function'] == 'md5sum_hexdigest'
    assert generated_manifest['dtoolcore_version'] == __version__
    expected_identifier = generate_identifier(second_handle)
    assert expected_identifier in generated_manifest['items']
    assert generated_manifest['items'][expected_identifier]['relpath'] \
        == second_handle
    assert generated_manifest['items'][expected_identifier]['hash'] \
        == '5e5ccafa2018a36f8726398cc6589de8'
def test_annotation_functional(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        ProtoDataSet,
        DtoolCoreKeyError,
        DtoolCoreInvalidNameError,
        generate_admin_metadata,
        copy,
    )

    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset.
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    # Test working on annotations with a ProtoDataset.
    with pytest.raises(DtoolCoreKeyError):
        proto_dataset.get_annotation(annotation_name="project")

    proto_dataset.put_annotation(annotation_name="project",
                                 annotation="world-peace")
    assert proto_dataset.get_annotation("project") == "world-peace"

    proto_dataset.put_annotation("project", "food-sustainability")
    assert proto_dataset.get_annotation("project") == "food-sustainability"

    assert proto_dataset.list_annotation_names() == ["project"]

    # Freeze the dataset
    proto_dataset.put_readme("")
    proto_dataset.freeze()

    # Test working on annotations with a frozen DataSet.
    dataset = DataSet.from_uri(dest_uri)
    with pytest.raises(DtoolCoreKeyError):
        dataset.get_annotation(annotation_name="stars")

    dataset.put_annotation(annotation_name="stars", annotation=0)
    assert dataset.get_annotation("stars") == 0

    dataset.put_annotation("stars", 5)
    assert dataset.get_annotation("stars") == 5

    assert dataset.list_annotation_names() == ["project", "stars"]

    # Test invalid keys, no spaces allowed.
    invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81]
    for invalid_key in invalid_keys:
        with pytest.raises(DtoolCoreInvalidNameError):
            dataset.put_annotation(invalid_key, "bad")

    # Test invalid keys, name too long.
    with pytest.raises(DtoolCoreInvalidNameError):
        dataset.put_annotation("x" * 81, "bad")

    # Test copy.
    copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy")
    os.mkdir(copy_dataset_directory)
    dest_uri = dataset.base_uri + "/copy"
    copy_uri = copy(dataset.uri, dest_uri)

    copy_dataset = DataSet.from_uri(copy_uri)
    assert copy_dataset.list_annotation_names() == ["project", "stars"]
    assert copy_dataset.get_annotation("stars") == 5
    assert copy_dataset.get_annotation("project") == "food-sustainability"