def test_store_and_retrieve_item_metadata(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() handle = 'dummy' # Here we add two set of metadata with different keys storagebroker.add_item_metadata(handle=handle, key='foo', value='bar') storagebroker.add_item_metadata(handle=handle, key='key', value={ 'subkey': 'subval', 'morekey': 'moreval' }) # Test metadata retrieval (we get back both sets of metadata) metadata = storagebroker.get_item_metadata(handle) assert metadata == { 'foo': 'bar', 'key': { 'subkey': 'subval', 'morekey': 'moreval' } }
def test_pre_freeze_hook(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() # Add a data file. data_fpath = os.path.join(storagebroker._data_abspath, "sample.txt") with open(data_fpath, "w") as fh: fh.write("some sample data") # The below should not raise an DiskStorageBrokerValidationError # because the structure is correct. storagebroker.pre_freeze_hook() # Now we add a rogue file. rogue_fpath = os.path.join(destination_path, "rogue.txt") with open(rogue_fpath, "w") as fh: fh.write("I should not be here") from dtoolcore.storagebroker import DiskStorageBrokerValidationWarning with pytest.raises(DiskStorageBrokerValidationWarning): storagebroker.pre_freeze_hook()
def test_store_and_retrieve_manifest(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() manifest = {'a': 'b', 'c': [1, 2, 3]} storagebroker.put_manifest(manifest) retrieved_manifest = storagebroker.get_manifest() assert retrieved_manifest == manifest # Test the formatting on disk. # expected = '{\n "a": "b", \n "c": [\n 1, \n 2, \n 3\n ]\n}' expected_lines = [ '{', ' "a": "b",', ' "c": [', ' 1,', ' 2,', ' 3', ' ]', '}' ] with open(storagebroker.get_manifest_key()) as fh: for i, actual in enumerate(fh): actual = actual.rstrip() expected = expected_lines[i] assert actual == expected
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], prefix=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("./my_dataset") expected_uri = "file://" + os.path.abspath("my_dataset") assert dataset.uri == expected_uri
def test_basic_workflow(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def test_post_freeze_hook(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() # The below should not raise an OSError because the .dtool/tmp_fragments # directory has not been created. storagebroker.post_freeze_hook() handle = 'dummy' storagebroker.add_item_metadata(handle, key='foo', value='bar') assert os.path.isdir(storagebroker._metadata_fragments_abspath) storagebroker.post_freeze_hook() assert not os.path.isdir(storagebroker._metadata_fragments_abspath)
def test_unix_relpaths_from_iter_item_handles(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() # Add a data file. data_subdir = os.path.join(storagebroker._data_abspath, "level") os.mkdir(data_subdir) data_fpath = os.path.join(data_subdir, "sample.txt") with open(data_fpath, "w") as fh: fh.write("some sample data") handles = [h for h in storagebroker.iter_item_handles()] assert len(handles) == 1 assert handles[0] == "level/sample.txt"
def test_has_admin_metadata(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) assert not storagebroker.has_admin_metadata() storagebroker.create_structure() assert not storagebroker.has_admin_metadata() admin_metadata = {'hello': 'world'} storagebroker.put_admin_metadata(admin_metadata) assert storagebroker.has_admin_metadata()
def test_store_and_retrieve_admin_metadata(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() admin_metadata = {'hello': 'world'} storagebroker.put_admin_metadata(admin_metadata) storagebroker_2 = DiskStorageBroker(destination_path) retrieved_admin_metadata = storagebroker_2.get_admin_metadata() assert retrieved_admin_metadata == admin_metadata
def test_diff_content(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, generate_admin_metadata, generate_proto_dataset, ) from dtoolcore.utils import generate_identifier from dtoolcore.compare import diff_content from dtoolcore.storagebroker import DiskStorageBroker fpaths = create_test_files(tmp_dir_fixture) proto_ds_a = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_1"), prefix=tmp_dir_fixture, storage="file") proto_ds_a.create() proto_ds_a.put_item(fpaths["cat"], "file.txt") proto_ds_a.freeze() proto_ds_b = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_2"), prefix=tmp_dir_fixture, storage="file") proto_ds_b.create() proto_ds_b.put_item(fpaths["she"], "file.txt") proto_ds_b.freeze() ds_a = DataSet.from_uri(proto_ds_a.uri) ds_b = DataSet.from_uri(proto_ds_b.uri) assert diff_content(ds_a, ds_a) == [] identifier = generate_identifier("file.txt") expected = [ (generate_identifier("file.txt"), DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)), DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier))) ] assert diff_content(ds_a, ds_b) == expected
def test_store_and_retrieve_readme(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() storagebroker.put_readme('Hello world') assert storagebroker.get_readme_content() == 'Hello world'
def test_basic_workflow_with_nested_handle(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') handle = "subdir/tiny.png" # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, handle) proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier(handle) assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Ensure that the file exists in the disk dataset. # Particularly on Windows. item_abspath = os.path.join( tmp_dir_fixture, name, "data", "subdir", "tiny.png" ) assert os.path.isfile(item_abspath) assert os.path.isfile(dataset.item_content_abspath(expected_identifier)) # Ensure that the correct abspath is returned. # Particularly on Windows. assert dataset.item_content_abspath(expected_identifier) == item_abspath # NOQA
def test_list_dataset_uris(tmp_dir_fixture): # NOQA import dtoolcore from dtoolcore.storagebroker import DiskStorageBroker assert [] == DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture, config_path=None) # Create two datasets to be copied. expected_uris = [] for name in ["test_ds_1", "test_ds_2"]: admin_metadata = dtoolcore.generate_admin_metadata(name) proto_dataset = dtoolcore.generate_proto_dataset( admin_metadata=admin_metadata, prefix=tmp_dir_fixture, storage="file") proto_dataset.create() expected_uris.append(proto_dataset.uri) actual_uris = DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture, config_path=None) assert set(expected_uris) == set(actual_uris)
def test_store_and_retrieve_item_metadata(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() example_overlay = {'abcdef': 1, 'ghijkl': 2} storagebroker.put_overlay(overlay_name="example", overlay=example_overlay) retrieved_overlay = storagebroker.get_overlay('example') assert example_overlay == retrieved_overlay
def test_put_item(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() input_file_path = os.path.join(TEST_SAMPLE_DATA, 'tiny.png') storagebroker.put_item(fpath=input_file_path, relpath='tiny.png') handles = list(storagebroker.iter_item_handles()) assert 'tiny.png' in handles
def test_item_properties(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() input_file_path = os.path.join(TEST_SAMPLE_DATA, 'tiny.png') storagebroker.put_item(fpath=input_file_path, relpath='tiny.png') handles = list(storagebroker.iter_item_handles()) handle = handles[0] item_properties = storagebroker.item_properties(handle) # Check size_in_bytes property assert item_properties['size_in_bytes'] == 276 # Check timestamp property assert 'utc_timestamp' in item_properties time_from_item = datetime.datetime.fromtimestamp(float( item_properties['utc_timestamp']), tz=pytz.UTC) time.sleep(0.1) # Make tests more robust on Windows. time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item assert time_delta.days == 0 assert time_delta.seconds < 20 # Check hash property from dtoolcore.filehasher import md5sum_hexdigest expected_hash = md5sum_hexdigest(input_file_path) assert item_properties['hash'] == expected_hash # Check relpath property assert item_properties['relpath'] == 'tiny.png'
def test_list_overlays_when_dir_missing(chdir_fixture): # NOQA """ This test simulates checking out a frozen dataset from Git that has no overlays written to it, i.e. where the ``.dtool/overlays`` directory is missing. See also: https://github.com/jic-dtool/dtoolcore/issues/3 """ from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri="file://.") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Simulate the missing overlay directory. assert os.path.isdir(proto_dataset._storage_broker._overlays_abspath) os.rmdir(proto_dataset._storage_broker._overlays_abspath) assert not os.path.isdir(proto_dataset._storage_broker._overlays_abspath) dataset = DataSet.from_uri(proto_dataset.uri) # This call caused the bug. overlay_names = dataset.list_overlay_names() assert overlay_names == []
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker from dtoolcore.utils import (IS_WINDOWS, windows_to_unix_path, urlparse) name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("my_dataset") abspath = os.path.abspath("my_dataset") if IS_WINDOWS: abspath = windows_to_unix_path(abspath) assert dataset.uri.startswith("file://") assert dataset.uri.endswith(abspath) parsed = urlparse(dataset.uri) if IS_WINDOWS: assert parsed.netloc == "" else: assert parsed.netloc != ""
def test_put_text_creates_missing_subdirectories(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) assert not os.path.exists(destination_path) storagebroker.create_structure() assert os.path.isdir(destination_path) assert os.path.isdir(storagebroker._annotations_abspath) os.rmdir(storagebroker._annotations_abspath) assert not os.path.isdir(storagebroker._annotations_abspath) annotation_key = os.path.join(storagebroker._annotations_abspath, "a.json") storagebroker.put_text(annotation_key, "{}") assert os.path.isdir(storagebroker._annotations_abspath) assert os.path.isfile(annotation_key) assert not os.path.isdir(annotation_key)
def test_put_get_annotation(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() # Test list annotation names. assert storagebroker.list_annotation_names() == [] # Test various types of values. storagebroker.put_annotation("project", "value") assert storagebroker.get_annotation("project") == "value" storagebroker.put_annotation("project", 1) assert storagebroker.get_annotation("project") == 1 assert type(storagebroker.get_annotation("project")) is int storagebroker.put_annotation("project", True) assert storagebroker.get_annotation("project") is True storagebroker.put_annotation("project", [1, 2, 3]) assert storagebroker.get_annotation("project") == [1, 2, 3] storagebroker.put_annotation("project", {"a": 1}) assert storagebroker.get_annotation("project") == {"a": 1} # Test list annotation names. assert storagebroker.list_annotation_names() == ["project"]
def test_update_readme(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) storagebroker.create_structure() storagebroker.put_readme('Hello world') assert storagebroker.get_readme_content() == 'Hello world' assert len(storagebroker._list_historical_readme_keys()) == 0 storagebroker.update_readme('Updated') assert storagebroker.get_readme_content() == 'Updated' assert len(storagebroker._list_historical_readme_keys()) == 1 with open(storagebroker._list_historical_readme_keys()[0]) as fh: assert fh.read() == 'Hello world' time.sleep(0.1) storagebroker.update_readme('Updated again') assert storagebroker.get_readme_content() == 'Updated again' assert len(storagebroker._list_historical_readme_keys()) == 2
def test_overlays_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, ProtoDataSet, DtoolCoreKeyError, DtoolCoreTypeError, DtoolCoreValueError, DtoolCoreInvalidNameError, generate_admin_metadata, copy, ) from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset. proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') # Freeze the dataset proto_dataset.put_readme("") proto_dataset.freeze() # Load the dataset. dataset = DataSet.from_uri(proto_dataset.uri) # The overlay has not been added yet. with pytest.raises(DtoolCoreKeyError): dataset.get_overlay("is_png") # Create overlay content. expected_identifier = generate_identifier('tiny.png') is_png_overlay = {expected_identifier: True} with pytest.raises(DtoolCoreTypeError): dataset.put_overlay("is_png", "not_a_dict") incorrect_identifier_overlay = {"incorrect": True} with pytest.raises(DtoolCoreValueError): dataset.put_overlay("is_png", incorrect_identifier_overlay) invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81] for invalid_key in invalid_keys: with pytest.raises(DtoolCoreInvalidNameError): dataset.put_overlay(invalid_key, is_png_overlay) dataset.put_overlay("is_png", is_png_overlay) assert dataset.get_overlay("is_png") == is_png_overlay # Test copy. copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy") os.mkdir(copy_dataset_directory) dest_uri = dataset.base_uri + "/copy" copy_uri = copy(dataset.uri, dest_uri) copy_dataset = DataSet.from_uri(copy_uri) assert copy_dataset.list_overlay_names() == ["is_png"] assert copy_dataset.get_overlay("is_png") == is_png_overlay
def test_create_structure(tmp_dir_fixture): # NOQA from dtoolcore.storagebroker import DiskStorageBroker from dtoolcore.storagebroker import StorageBrokerOSError storagebroker = DiskStorageBroker(tmp_dir_fixture) with pytest.raises(StorageBrokerOSError): storagebroker.create_structure() destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) assert not os.path.exists(destination_path) storagebroker.create_structure() assert os.path.isdir(destination_path) destination_path = os.path.join(tmp_dir_fixture, 'sub', 'my_proto_dataset') storagebroker = DiskStorageBroker(destination_path) with pytest.raises(OSError): storagebroker.create_structure()
def test_notify_route(tmp_app_with_users, tmp_dir_fixture): # NOQA bucket_name = 'bucket' # Add local directory as base URI and assign URI to the bucket base_uri = sanitise_uri(tmp_dir_fixture) register_base_uri(base_uri) update_permissions({ 'base_uri': base_uri, 'users_with_search_permissions': ['snow-white'], 'users_with_register_permissions': ['snow-white'], }) Config.BUCKET_TO_BASE_URI[bucket_name] = base_uri # Create test dataset name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() readme = 'abc: def' proto_dataset.put_readme(readme) proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Tell plugin that dataset has been created r = tmp_app_with_users.post( "/elastic-search/notify/all/{}".format(name), json={ 'bucket': bucket_name, 'metadata': dataset._admin_metadata }, ) assert r.status_code == 200 # Check that dataset has actually been registered datasets = list_datasets_by_user('snow-white') assert len(datasets) == 1 assert datasets[0]['base_uri'] == base_uri assert datasets[0]['uri'] == dest_uri assert datasets[0]['uuid'] == admin_metadata['uuid'] assert datasets[0]['name'] == name # Check README check_readme = get_readme_from_uri_by_user('snow-white', dest_uri) assert check_readme == yaml.load(readme) # Update README new_readme = 'ghi: jkl' dataset.put_readme(new_readme) # Notify plugin about updated name r = tmp_app_with_users.post( "/elastic-search/notify/all/{}".format(name), json={ 'bucket': bucket_name, 'metadata': dataset._admin_metadata }, ) assert r.status_code == 200 # Check dataset datasets = list_datasets_by_user('snow-white') assert len(datasets) == 1 assert datasets[0]['base_uri'] == base_uri assert datasets[0]['uri'] == dest_uri assert datasets[0]['uuid'] == admin_metadata['uuid'] assert datasets[0]['name'] == name # Check that README has actually been changed check_readme = get_readme_from_uri_by_user('snow-white', dest_uri) assert check_readme == yaml.load(new_readme) # Tell plugin that dataset has been deleted r = tmp_app_with_users.delete( "/elastic-search/notify/all/{}_{}/dtool".format( bucket_name, admin_metadata['uuid'])) assert r.status_code == 200 # Check that dataset has been deleted datasets = list_datasets_by_user('snow-white') assert len(datasets) == 0
def test_initialise(): from dtoolcore.storagebroker import DiskStorageBroker path = '/a/path' storagebroker = DiskStorageBroker(uri=path) # NOQA
def test_proto_dataset_freeze_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( generate_admin_metadata, DataSet, ProtoDataSet, DtoolCoreTypeError ) from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "func_test_dataset_freeze" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None ) proto_dataset.create() filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt'] for filename in filenames: local_file_path = os.path.join(sample_data_path, filename) proto_dataset.put_item(local_file_path, filename) proto_dataset.add_item_metadata( filename, 'namelen', len(filename) ) proto_dataset.add_item_metadata( filename, 'firstletter', filename[0] ) proto_dataset.put_readme(content='Hello world!') # We shouldn't be able to load this as a DataSet with pytest.raises(DtoolCoreTypeError): DataSet.from_uri(dest_uri) proto_dataset.freeze() # Freezing removes the temporary metadata fragments directory. assert not os.path.isdir( proto_dataset._storage_broker._metadata_fragments_abspath) # Now we shouln't be able to load as a ProtoDataSet with pytest.raises(DtoolCoreTypeError): ProtoDataSet.from_uri(dest_uri) # But we can as a DataSet dataset = DataSet.from_uri(dest_uri) assert dataset.name == 'func_test_dataset_freeze' # Test identifiers expected_identifiers = map(generate_identifier, filenames) assert set(dataset.identifiers) == set(expected_identifiers) # Test readme contents assert dataset.get_readme_content() == "Hello world!" # Test item expected_identifier = generate_identifier('tiny.png') item_properties = dataset.item_properties(expected_identifier) assert item_properties['relpath'] == 'tiny.png' assert item_properties['size_in_bytes'] == 276 assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755' # Test accessing item expected_identifier = generate_identifier('another_file.txt') fpath = dataset.item_content_abspath(expected_identifier) with open(fpath) as fh: contents = fh.read() assert contents == "Hello\n" # Test overlays have been created properly namelen_overlay = dataset.get_overlay('namelen') expected_identifier = generate_identifier('another_file.txt') assert namelen_overlay[expected_identifier] == len('another_file.txt')
def test_creation_and_reading(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore.storagebroker import DiskStorageBroker name = "func_test_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) # Create a proto dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_readme("") assert proto_dataset.name == "func_test_dataset" # Test reading from URI. proto_dataset = ProtoDataSet.from_uri(dest_uri) assert proto_dataset.name == "func_test_dataset" # Test get/put readme. assert proto_dataset.get_readme_content() == "" proto_dataset.put_readme("Hello world!") assert proto_dataset.get_readme_content() == "Hello world!" # Test putting a local file handle = "tiny.png" local_file_path = os.path.join(sample_data_path, 'tiny.png') proto_dataset.put_item(local_file_path, handle) assert handle in list(proto_dataset._storage_broker.iter_item_handles()) # Test properties of that file item_properties = proto_dataset._storage_broker.item_properties(handle) assert item_properties['relpath'] == 'tiny.png' assert item_properties['size_in_bytes'] == 276 assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755' assert 'utc_timestamp' in item_properties time_from_item = datetime.datetime.fromtimestamp( float(item_properties['utc_timestamp']), tz=pytz.UTC ) time.sleep(0.1) # Make tests more robust on Windows. time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item assert time_delta.days == 0 assert time_delta.seconds < 20 # Add metadata proto_dataset.add_item_metadata(handle, 'foo', 'bar') proto_dataset.add_item_metadata( handle, 'key', {'subkey': 'subval', 'morekey': 'moreval'} ) # Test metadata retrieval metadata = proto_dataset._storage_broker.get_item_metadata(handle) assert metadata == { 'foo': 'bar', 'key': { 'subkey': 'subval', 'morekey': 'moreval' } } # Add another item and test manifest from dtoolcore import __version__ from dtoolcore.utils import generate_identifier second_fname = "random_bytes" local_file_path = os.path.join(sample_data_path, second_fname) proto_dataset.put_item(local_file_path, second_fname) second_handle = second_fname generated_manifest = proto_dataset.generate_manifest() assert generated_manifest['hash_function'] == 'md5sum_hexdigest' assert generated_manifest['dtoolcore_version'] == __version__ expected_identifier = generate_identifier(second_handle) assert expected_identifier in generated_manifest['items'] assert generated_manifest['items'][expected_identifier]['relpath'] \ == second_handle assert generated_manifest['items'][expected_identifier]['hash'] \ == '5e5ccafa2018a36f8726398cc6589de8'
def test_annotation_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, ProtoDataSet, DtoolCoreKeyError, DtoolCoreInvalidNameError, generate_admin_metadata, copy, ) from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset. proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') # Test working on annotations with a ProtoDataset. with pytest.raises(DtoolCoreKeyError): proto_dataset.get_annotation(annotation_name="project") proto_dataset.put_annotation(annotation_name="project", annotation="world-peace") assert proto_dataset.get_annotation("project") == "world-peace" proto_dataset.put_annotation("project", "food-sustainability") assert proto_dataset.get_annotation("project") == "food-sustainability" assert proto_dataset.list_annotation_names() == ["project"] # Freeze the dataset proto_dataset.put_readme("") proto_dataset.freeze() # Test working on annotations with a frozen DataSet. dataset = DataSet.from_uri(dest_uri) with pytest.raises(DtoolCoreKeyError): dataset.get_annotation(annotation_name="stars") dataset.put_annotation(annotation_name="stars", annotation=0) assert dataset.get_annotation("stars") == 0 dataset.put_annotation("stars", 5) assert dataset.get_annotation("stars") == 5 assert dataset.list_annotation_names() == ["project", "stars"] # Test invalid keys, no spaces allowed. invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81] for invalid_key in invalid_keys: with pytest.raises(DtoolCoreInvalidNameError): dataset.put_annotation(invalid_key, "bad") # Test invalid keys, name too long. with pytest.raises(DtoolCoreInvalidNameError): dataset.put_annotation("x" * 81, "bad") # Test copy. copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy") os.mkdir(copy_dataset_directory) dest_uri = dataset.base_uri + "/copy" copy_uri = copy(dataset.uri, dest_uri) copy_dataset = DataSet.from_uri(copy_uri) assert copy_dataset.list_annotation_names() == ["project", "stars"] assert copy_dataset.get_annotation("stars") == 5 assert copy_dataset.get_annotation("project") == "food-sustainability"