def test_version_rest_publish_zarr( api_client, user: User, draft_version: Version, draft_asset_factory, zarr_archive_factory, ): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) zarr_archive = zarr_archive_factory(dandiset=draft_version.dandiset) zarr_asset: Asset = draft_asset_factory(zarr=zarr_archive, blob=None) normal_asset: Asset = draft_asset_factory() draft_version.assets.add(zarr_asset) draft_version.assets.add(normal_asset) # Validate the metadata to mark the assets and version as `VALID` tasks.validate_asset_metadata(zarr_asset.id) tasks.validate_asset_metadata(normal_asset.id) tasks.validate_version_metadata(draft_version.id) draft_version.refresh_from_db() assert draft_version.valid resp = api_client.post( f'/api/dandisets/{draft_version.dandiset.identifier}' f'/versions/{draft_version.version}/publish/' ) assert resp.status_code == 400 assert resp.json() == ['Cannot publish dandisets which contain zarrs']
def revalidate(assets: bool, versions: bool, revalidate_all: bool, dry_run: bool): """ Revalidate all Versions and Assets. This script will run the validation immediately in band without dispatching tasks to the queue. """ if assets: asset_qs = Asset.objects if not revalidate_all: asset_qs = asset_qs.filter(status=Asset.Status.INVALID) click.echo(f'Revalidating {asset_qs.count()} assets') if not dry_run: for asset in asset_qs.values('id'): validate_asset_metadata(asset['id']) if versions: # Only revalidate draft versions version_qs = Version.objects.filter(version='draft') if not revalidate_all: version_qs = version_qs.filter(status=Version.Status.INVALID, ) click.echo(f'Revalidating {version_qs.count()} versions') if not dry_run: for version in version_qs.values('id'): validate_version_metadata(version['id'])
def create_dev_dandiset(name: str, owner: str): owner = User.objects.get(email=owner) # Create a new dandiset dandiset = Dandiset() dandiset.save() dandiset.add_owner(owner) # Create the draft version version_metadata = { 'schemaVersion': settings.DANDI_SCHEMA_VERSION, 'schemaKey': 'Dandiset', 'description': 'An informative description', 'license': ['spdx:CC0-1.0'], 'contributor': [ { 'name': f'{owner.last_name}, {owner.first_name}', 'email': owner.email, 'roleName': ['dcite:ContactPerson'], 'schemaKey': 'Person', 'affiliation': [], 'includeInCitation': True, }, ], } draft_version = Version( dandiset=dandiset, name=name, metadata=version_metadata, version='draft', ) draft_version.save() uploaded_file = SimpleUploadedFile(name='foo/bar.txt', content=b'A' * 20) etag = '76d36e98f312e98ff908c8c82c8dd623-0' try: asset_blob = AssetBlob.objects.get(etag=etag) except AssetBlob.DoesNotExist: asset_blob = AssetBlob( blob_id=uuid4(), blob=uploaded_file, etag=etag, size=20, ) asset_blob.save() asset_metadata = { 'schemaVersion': settings.DANDI_SCHEMA_VERSION, 'encodingFormat': 'text/plain', 'schemaKey': 'Asset', } asset = Asset(blob=asset_blob, metadata=asset_metadata, path='foo/bar.txt') asset.save() draft_version.assets.add(asset) calculate_sha256(blob_id=asset_blob.blob_id) validate_asset_metadata(asset_id=asset.id) validate_version_metadata(version_id=draft_version.id)
def test_unembargo_dandiset_existing_blobs( dandiset_factory, draft_version_factory, asset_factory, asset_blob_factory, embargoed_asset_blob_factory, storage_tuple, ): # Pretend like AssetBlob/EmbargoedAssetBlob were defined with the given storage storage, embargoed_storage = storage_tuple AssetBlob.blob.field.storage = storage EmbargoedAssetBlob.blob.field.storage = embargoed_storage # Create dandiset and version dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) draft_version: Version = draft_version_factory(dandiset=dandiset) # Create embargoed assets embargoed_asset_blob: EmbargoedAssetBlob = embargoed_asset_blob_factory() embargoed_asset: Asset = asset_factory(embargoed_blob=embargoed_asset_blob, blob=None) draft_version.assets.add(embargoed_asset) # Create unembargoed asset with identical data embargoed_asset_blob_data = embargoed_asset_blob.blob.read() embargoed_asset_blob.blob.seek(0) existing_asset_blob = asset_blob_factory( blob=factory.django.FileField(data=embargoed_asset_blob_data) ) # Assert properties before unembargo assert embargoed_asset.embargoed_blob is not None assert embargoed_asset.blob is None assert embargoed_asset.embargoed_blob.etag != '' assert existing_asset_blob.etag != '' assert embargoed_asset_blob.etag == existing_asset_blob.etag # Run unembargo tasks.unembargo_dandiset(dandiset.pk) tasks.validate_version_metadata(draft_version.pk) dandiset.refresh_from_db() draft_version.refresh_from_db() # Assert correct changes took place assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN assert draft_version.status == Version.Status.VALID assert draft_version.metadata['access'] == [ {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'} ] # Assert no new asset created asset: Asset = draft_version.assets.first() assert asset == embargoed_asset # Check blobs assert asset.embargoed_blob is None assert asset.blob is not None assert asset.blob.etag == embargoed_asset_blob.etag assert asset.blob == existing_asset_blob
def test_validate_version_metadata(version: Version, asset: Asset): version.assets.add(asset) tasks.validate_version_metadata(version.id) version.refresh_from_db() assert version.status == Version.Status.VALID assert version.validation_errors == []
def test_validate_version_metadata_malformed_schema_version( version: Version, asset: Asset): version.assets.add(asset) version.metadata['schemaVersion'] = 'xxx' version.save() tasks.validate_version_metadata(version.id) version.refresh_from_db() assert version.status == Version.Status.INVALID assert len(version.validation_errors) == 1 assert version.validation_errors[0]['message'].startswith( 'Metadata version xxx is not allowed.')
def test_unembargo_dandiset_normal_asset_blob( dandiset_factory, draft_version_factory, asset_factory, asset_blob_factory, storage, ): # Pretend like AssetBlob was defined with the given storage AssetBlob.blob.field.storage = storage # Create dandiset and version dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) draft_version: Version = draft_version_factory(dandiset=dandiset) # Create asset asset_blob: AssetBlob = asset_blob_factory() asset: Asset = asset_factory(blob=asset_blob, embargoed_blob=None) draft_version.assets.add(asset) # Assert properties before unembargo assert asset.embargoed_blob is None assert asset.blob is not None # Run unembargo tasks.unembargo_dandiset(dandiset.pk) tasks.validate_version_metadata(draft_version.pk) dandiset.refresh_from_db() draft_version.refresh_from_db() # Assert correct changes took place assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN assert draft_version.status == Version.Status.VALID assert draft_version.metadata['access'] == [ {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'} ] # Assert no new asset created fetched_asset: Asset = draft_version.assets.first() assert asset == fetched_asset # Check that blob is unchanged assert fetched_asset.blob == asset_blob assert asset.embargoed_blob is None assert asset.blob is not None assert asset.blob.etag assert asset.blob
def test_validate_version_metadata_malformed_license(version: Version, asset: Asset): version.assets.add(asset) version.metadata['license'] = 'foo' version.save() tasks.validate_version_metadata(version.id) version.refresh_from_db() assert version.status == Version.Status.INVALID assert version.validation_errors == [{ 'field': 'license', 'message': "'foo' is not of type 'array'" }]
def test_validate_version_metadata_no_description(version: Version, asset: Asset): version.assets.add(asset) del version.metadata['description'] version.save() tasks.validate_version_metadata(version.id) version.refresh_from_db() assert version.status == Version.Status.INVALID assert version.validation_errors == [{ 'field': '', 'message': "'description' is a required property" }]
def test_version_rest_publish_assets( api_client, user: User, draft_version: Version, draft_asset_factory, published_asset_factory, ): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) old_draft_asset: Asset = draft_asset_factory() old_published_asset: Asset = published_asset_factory() old_published_asset.publish() old_published_asset.save() assert not old_draft_asset.published assert old_published_asset.published draft_version.assets.add(old_draft_asset) draft_version.assets.add(old_published_asset) # Validate the metadata to mark the assets and version as `VALID` tasks.validate_asset_metadata(old_draft_asset.id) tasks.validate_asset_metadata(old_published_asset.id) tasks.validate_version_metadata(draft_version.id) draft_version.refresh_from_db() assert draft_version.valid resp = api_client.post( f'/api/dandisets/{draft_version.dandiset.identifier}' f'/versions/{draft_version.version}/publish/' ) assert resp.status_code == 200 published_version = Version.objects.get(version=resp.data['version']) assert published_version.assets.count() == 2 new_draft_asset: Asset = published_version.assets.get(asset_id=old_draft_asset.asset_id) new_published_asset: Asset = published_version.assets.get(asset_id=old_published_asset.asset_id) # The former draft asset should have been modified into a published asset assert new_draft_asset.published assert new_draft_asset.asset_id == old_draft_asset.asset_id assert new_draft_asset.path == old_draft_asset.path assert new_draft_asset.blob == old_draft_asset.blob assert new_draft_asset.metadata == { **old_draft_asset.metadata, 'datePublished': UTC_ISO_TIMESTAMP_RE, 'publishedBy': { 'id': URN_RE, 'name': 'DANDI publish', 'startDate': UTC_ISO_TIMESTAMP_RE, # TODO endDate needs to be defined before publish is complete 'endDate': UTC_ISO_TIMESTAMP_RE, 'wasAssociatedWith': [ { 'id': URN_RE, 'identifier': 'RRID:SCR_017571', 'name': 'DANDI API', 'version': '0.1.0', 'schemaKey': 'Software', } ], 'schemaKey': 'PublishActivity', }, } # The published_asset should be completely unchanged assert new_published_asset.published assert new_published_asset.asset_id == old_published_asset.asset_id assert new_published_asset.path == old_published_asset.path assert new_published_asset.blob == old_published_asset.blob assert new_published_asset.metadata == old_published_asset.metadata
def test_version_rest_publish(api_client, user: User, draft_version: Version, asset: Asset): assign_perm('owner', user, draft_version.dandiset) api_client.force_authenticate(user=user) draft_version.assets.add(asset) # Validate the metadata to mark the version and asset as `VALID` tasks.validate_version_metadata(draft_version.id) tasks.validate_asset_metadata(asset.id) draft_version.refresh_from_db() assert draft_version.valid resp = api_client.post( f'/api/dandisets/{draft_version.dandiset.identifier}' f'/versions/{draft_version.version}/publish/' ) assert resp.data == { 'dandiset': { 'identifier': draft_version.dandiset.identifier, 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'contact_person': draft_version.metadata['contributor'][0]['name'], 'embargo_status': 'OPEN', }, 'version': VERSION_ID_RE, 'name': draft_version.name, 'created': TIMESTAMP_RE, 'modified': TIMESTAMP_RE, 'asset_count': 1, 'size': draft_version.size, 'status': 'Valid', } published_version = Version.objects.get(version=resp.data['version']) assert published_version assert draft_version.dandiset.versions.count() == 2 published_asset: Asset = published_version.assets.get() assert published_asset.published # The asset should be the same after publishing assert asset.asset_id == published_asset.asset_id assert published_version.metadata == { **draft_version.metadata, 'publishedBy': { 'id': URN_RE, 'name': 'DANDI publish', 'startDate': UTC_ISO_TIMESTAMP_RE, 'endDate': UTC_ISO_TIMESTAMP_RE, 'wasAssociatedWith': [ { 'id': URN_RE, 'identifier': 'RRID:SCR_017571', 'name': 'DANDI API', # TODO version the API 'version': '0.1.0', 'schemaKey': 'Software', } ], 'schemaKey': 'PublishActivity', }, 'datePublished': UTC_ISO_TIMESTAMP_RE, 'manifestLocation': [ f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/dandisets/{draft_version.dandiset.identifier}/{published_version.version}/assets.yaml', # noqa: E501 ], 'identifier': f'DANDI:{draft_version.dandiset.identifier}', 'version': published_version.version, 'id': f'DANDI:{draft_version.dandiset.identifier}/{published_version.version}', 'url': ( f'{settings.DANDI_WEB_APP_URL}/dandiset/{draft_version.dandiset.identifier}' f'/{published_version.version}' ), 'citation': published_version.citation(published_version.metadata), 'doi': f'10.80507/dandi.{draft_version.dandiset.identifier}/{published_version.version}', # Once the assets are linked, assetsSummary should be computed properly 'assetsSummary': { 'schemaKey': 'AssetsSummary', 'numberOfBytes': 100, 'numberOfFiles': 1, 'dataStandard': [ { 'schemaKey': 'StandardsType', 'identifier': 'RRID:SCR_015242', 'name': 'Neurodata Without Borders (NWB)', } ], 'approach': [], 'measurementTechnique': [], 'variableMeasured': [], 'species': [], }, } draft_version.refresh_from_db() assert draft_version.status == Version.Status.PUBLISHED assert not draft_version.valid
def test_unembargo_dandiset( dandiset_factory, draft_version_factory, asset_factory, embargoed_asset_blob_factory, storage_tuple, file_size, part_size, monkeypatch, ): # Pretend like AssetBlob/EmbargoedAssetBlob were defined with the given storage storage, embargoed_storage = storage_tuple monkeypatch.setattr(AssetBlob.blob.field, 'storage', storage) monkeypatch.setattr(EmbargoedAssetBlob.blob.field, 'storage', embargoed_storage) # Monkey patch PartGenerator so that upload and copy use a smaller part size monkeypatch.setattr(PartGenerator, 'DEFAULT_PART_SIZE', part_size, raising=True) # Create dandiset and version dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED) draft_version: Version = draft_version_factory(dandiset=dandiset) # Create an embargoed asset blob embargoed_asset_blob: EmbargoedAssetBlob = embargoed_asset_blob_factory( size=file_size, blob=SimpleUploadedFile('test', content=os.urandom(file_size)) ) # Assert multiple parts were used num_parts = math.ceil(file_size / part_size) assert embargoed_asset_blob.etag.endswith(f'-{num_parts}') # Create asset from embargoed blob embargoed_asset: Asset = asset_factory(embargoed_blob=embargoed_asset_blob, blob=None) draft_version.assets.add(embargoed_asset) # Assert properties before unembargo assert embargoed_asset.embargoed_blob is not None assert embargoed_asset.blob is None assert embargoed_asset.embargoed_blob.etag != '' # Run unembargo and validate version metadata tasks.unembargo_dandiset(dandiset.pk) tasks.validate_version_metadata(draft_version.pk) dandiset.refresh_from_db() draft_version.refresh_from_db() # Assert correct changes took place assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN assert draft_version.status == Version.Status.VALID assert draft_version.metadata['access'] == [ {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'} ] # Assert no new asset created asset: Asset = draft_version.assets.first() assert asset == embargoed_asset # Check blobs assert asset.embargoed_blob is None assert asset.blob is not None assert asset.blob.etag == embargoed_asset_blob.etag blob_id = str(asset.blob.blob_id) assert asset.blob.blob.name == f'test-prefix/blobs/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}'