def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], prefix=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("./my_dataset") expected_uri = "file://" + os.path.abspath("my_dataset") assert dataset.uri == expected_uri
def test_basic_workflow(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def test_basic_workflow_with_nested_handle(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') handle = "subdir/tiny.png" # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, handle) proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier(handle) assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Ensure that the file exists in the disk dataset. # Particularly on Windows. item_abspath = os.path.join( tmp_dir_fixture, name, "data", "subdir", "tiny.png" ) assert os.path.isfile(item_abspath) assert os.path.isfile(dataset.item_content_abspath(expected_identifier)) # Ensure that the correct abspath is returned. # Particularly on Windows. assert dataset.item_content_abspath(expected_identifier) == item_abspath # NOQA
def test_list_overlays_when_dir_missing(chdir_fixture): # NOQA """ This test simulates checking out a frozen dataset from Git that has no overlays written to it, i.e. where the ``.dtool/overlays`` directory is missing. See also: https://github.com/jic-dtool/dtoolcore/issues/3 """ from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri="file://.") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Simulate the missing overlay directory. assert os.path.isdir(proto_dataset._storage_broker._overlays_abspath) os.rmdir(proto_dataset._storage_broker._overlays_abspath) assert not os.path.isdir(proto_dataset._storage_broker._overlays_abspath) dataset = DataSet.from_uri(proto_dataset.uri) # This call caused the bug. overlay_names = dataset.list_overlay_names() assert overlay_names == []
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker from dtoolcore.utils import (IS_WINDOWS, windows_to_unix_path, urlparse) name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("my_dataset") abspath = os.path.abspath("my_dataset") if IS_WINDOWS: abspath = windows_to_unix_path(abspath) assert dataset.uri.startswith("file://") assert dataset.uri.endswith(abspath) parsed = urlparse(dataset.uri) if IS_WINDOWS: assert parsed.netloc == "" else: assert parsed.netloc != ""
def test_proto_dataset_freeze_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( generate_admin_metadata, DataSet, ProtoDataSet, DtoolCoreTypeError ) from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "func_test_dataset_freeze" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None ) proto_dataset.create() filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt'] for filename in filenames: local_file_path = os.path.join(sample_data_path, filename) proto_dataset.put_item(local_file_path, filename) proto_dataset.add_item_metadata( filename, 'namelen', len(filename) ) proto_dataset.add_item_metadata( filename, 'firstletter', filename[0] ) proto_dataset.put_readme(content='Hello world!') # We shouldn't be able to load this as a DataSet with pytest.raises(DtoolCoreTypeError): DataSet.from_uri(dest_uri) proto_dataset.freeze() # Freezing removes the temporary metadata fragments directory. assert not os.path.isdir( proto_dataset._storage_broker._metadata_fragments_abspath) # Now we shouln't be able to load as a ProtoDataSet with pytest.raises(DtoolCoreTypeError): ProtoDataSet.from_uri(dest_uri) # But we can as a DataSet dataset = DataSet.from_uri(dest_uri) assert dataset.name == 'func_test_dataset_freeze' # Test identifiers expected_identifiers = map(generate_identifier, filenames) assert set(dataset.identifiers) == set(expected_identifiers) # Test readme contents assert dataset.get_readme_content() == "Hello world!" # Test item expected_identifier = generate_identifier('tiny.png') item_properties = dataset.item_properties(expected_identifier) assert item_properties['relpath'] == 'tiny.png' assert item_properties['size_in_bytes'] == 276 assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755' # Test accessing item expected_identifier = generate_identifier('another_file.txt') fpath = dataset.item_content_abspath(expected_identifier) with open(fpath) as fh: contents = fh.read() assert contents == "Hello\n" # Test overlays have been created properly namelen_overlay = dataset.get_overlay('namelen') expected_identifier = generate_identifier('another_file.txt') assert namelen_overlay[expected_identifier] == len('another_file.txt')
def test_creation_and_reading(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore.storagebroker import DiskStorageBroker name = "func_test_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) # Create a proto dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_readme("") assert proto_dataset.name == "func_test_dataset" # Test reading from URI. proto_dataset = ProtoDataSet.from_uri(dest_uri) assert proto_dataset.name == "func_test_dataset" # Test get/put readme. assert proto_dataset.get_readme_content() == "" proto_dataset.put_readme("Hello world!") assert proto_dataset.get_readme_content() == "Hello world!" # Test putting a local file handle = "tiny.png" local_file_path = os.path.join(sample_data_path, 'tiny.png') proto_dataset.put_item(local_file_path, handle) assert handle in list(proto_dataset._storage_broker.iter_item_handles()) # Test properties of that file item_properties = proto_dataset._storage_broker.item_properties(handle) assert item_properties['relpath'] == 'tiny.png' assert item_properties['size_in_bytes'] == 276 assert item_properties['hash'] == 'dc73192d2f81d7009ce5a1ee7bad5755' assert 'utc_timestamp' in item_properties time_from_item = datetime.datetime.fromtimestamp( float(item_properties['utc_timestamp']), tz=pytz.UTC ) time.sleep(0.1) # Make tests more robust on Windows. time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item assert time_delta.days == 0 assert time_delta.seconds < 20 # Add metadata proto_dataset.add_item_metadata(handle, 'foo', 'bar') proto_dataset.add_item_metadata( handle, 'key', {'subkey': 'subval', 'morekey': 'moreval'} ) # Test metadata retrieval metadata = proto_dataset._storage_broker.get_item_metadata(handle) assert metadata == { 'foo': 'bar', 'key': { 'subkey': 'subval', 'morekey': 'moreval' } } # Add another item and test manifest from dtoolcore import __version__ from dtoolcore.utils import generate_identifier second_fname = "random_bytes" local_file_path = os.path.join(sample_data_path, second_fname) proto_dataset.put_item(local_file_path, second_fname) second_handle = second_fname generated_manifest = proto_dataset.generate_manifest() assert generated_manifest['hash_function'] == 'md5sum_hexdigest' assert generated_manifest['dtoolcore_version'] == __version__ expected_identifier = generate_identifier(second_handle) assert expected_identifier in generated_manifest['items'] assert generated_manifest['items'][expected_identifier]['relpath'] \ == second_handle assert generated_manifest['items'][expected_identifier]['hash'] \ == '5e5ccafa2018a36f8726398cc6589de8'
def test_annotation_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, ProtoDataSet, DtoolCoreKeyError, DtoolCoreInvalidNameError, generate_admin_metadata, copy, ) from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset. proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') # Test working on annotations with a ProtoDataset. with pytest.raises(DtoolCoreKeyError): proto_dataset.get_annotation(annotation_name="project") proto_dataset.put_annotation(annotation_name="project", annotation="world-peace") assert proto_dataset.get_annotation("project") == "world-peace" proto_dataset.put_annotation("project", "food-sustainability") assert proto_dataset.get_annotation("project") == "food-sustainability" assert proto_dataset.list_annotation_names() == ["project"] # Freeze the dataset proto_dataset.put_readme("") proto_dataset.freeze() # Test working on annotations with a frozen DataSet. dataset = DataSet.from_uri(dest_uri) with pytest.raises(DtoolCoreKeyError): dataset.get_annotation(annotation_name="stars") dataset.put_annotation(annotation_name="stars", annotation=0) assert dataset.get_annotation("stars") == 0 dataset.put_annotation("stars", 5) assert dataset.get_annotation("stars") == 5 assert dataset.list_annotation_names() == ["project", "stars"] # Test invalid keys, no spaces allowed. invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81] for invalid_key in invalid_keys: with pytest.raises(DtoolCoreInvalidNameError): dataset.put_annotation(invalid_key, "bad") # Test invalid keys, name too long. with pytest.raises(DtoolCoreInvalidNameError): dataset.put_annotation("x" * 81, "bad") # Test copy. copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy") os.mkdir(copy_dataset_directory) dest_uri = dataset.base_uri + "/copy" copy_uri = copy(dataset.uri, dest_uri) copy_dataset = DataSet.from_uri(copy_uri) assert copy_dataset.list_annotation_names() == ["project", "stars"] assert copy_dataset.get_annotation("stars") == 5 assert copy_dataset.get_annotation("project") == "food-sustainability"
def test_notify_route(tmp_app_with_users, tmp_dir_fixture): # NOQA bucket_name = 'bucket' # Add local directory as base URI and assign URI to the bucket base_uri = sanitise_uri(tmp_dir_fixture) register_base_uri(base_uri) update_permissions({ 'base_uri': base_uri, 'users_with_search_permissions': ['snow-white'], 'users_with_register_permissions': ['snow-white'], }) Config.BUCKET_TO_BASE_URI[bucket_name] = base_uri # Create test dataset name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() readme = 'abc: def' proto_dataset.put_readme(readme) proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Tell plugin that dataset has been created r = tmp_app_with_users.post( "/elastic-search/notify/all/{}".format(name), json={ 'bucket': bucket_name, 'metadata': dataset._admin_metadata }, ) assert r.status_code == 200 # Check that dataset has actually been registered datasets = list_datasets_by_user('snow-white') assert len(datasets) == 1 assert datasets[0]['base_uri'] == base_uri assert datasets[0]['uri'] == dest_uri assert datasets[0]['uuid'] == admin_metadata['uuid'] assert datasets[0]['name'] == name # Check README check_readme = get_readme_from_uri_by_user('snow-white', dest_uri) assert check_readme == yaml.load(readme) # Update README new_readme = 'ghi: jkl' dataset.put_readme(new_readme) # Notify plugin about updated name r = tmp_app_with_users.post( "/elastic-search/notify/all/{}".format(name), json={ 'bucket': bucket_name, 'metadata': dataset._admin_metadata }, ) assert r.status_code == 200 # Check dataset datasets = list_datasets_by_user('snow-white') assert len(datasets) == 1 assert datasets[0]['base_uri'] == base_uri assert datasets[0]['uri'] == dest_uri assert datasets[0]['uuid'] == admin_metadata['uuid'] assert datasets[0]['name'] == name # Check that README has actually been changed check_readme = get_readme_from_uri_by_user('snow-white', dest_uri) assert check_readme == yaml.load(new_readme) # Tell plugin that dataset has been deleted r = tmp_app_with_users.delete( "/elastic-search/notify/all/{}_{}/dtool".format( bucket_name, admin_metadata['uuid'])) assert r.status_code == 200 # Check that dataset has been deleted datasets = list_datasets_by_user('snow-white') assert len(datasets) == 0
def test_overlays_functional(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, ProtoDataSet, DtoolCoreKeyError, DtoolCoreTypeError, DtoolCoreValueError, DtoolCoreInvalidNameError, generate_admin_metadata, copy, ) from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset. proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') # Freeze the dataset proto_dataset.put_readme("") proto_dataset.freeze() # Load the dataset. dataset = DataSet.from_uri(proto_dataset.uri) # The overlay has not been added yet. with pytest.raises(DtoolCoreKeyError): dataset.get_overlay("is_png") # Create overlay content. expected_identifier = generate_identifier('tiny.png') is_png_overlay = {expected_identifier: True} with pytest.raises(DtoolCoreTypeError): dataset.put_overlay("is_png", "not_a_dict") incorrect_identifier_overlay = {"incorrect": True} with pytest.raises(DtoolCoreValueError): dataset.put_overlay("is_png", incorrect_identifier_overlay) invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81] for invalid_key in invalid_keys: with pytest.raises(DtoolCoreInvalidNameError): dataset.put_overlay(invalid_key, is_png_overlay) dataset.put_overlay("is_png", is_png_overlay) assert dataset.get_overlay("is_png") == is_png_overlay # Test copy. copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy") os.mkdir(copy_dataset_directory) dest_uri = dataset.base_uri + "/copy" copy_uri = copy(dataset.uri, dest_uri) copy_dataset = DataSet.from_uri(copy_uri) assert copy_dataset.list_overlay_names() == ["is_png"] assert copy_dataset.get_overlay("is_png") == is_png_overlay