def create_dev_dandiset(name: str, owner: str): owner = User.objects.get(email=owner) # Create a new dandiset dandiset = Dandiset() dandiset.save() dandiset.add_owner(owner) # Create the draft version version_metadata = { 'schemaVersion': settings.DANDI_SCHEMA_VERSION, 'schemaKey': 'Dandiset', 'description': 'An informative description', 'license': ['spdx:CC0-1.0'], 'contributor': [ { 'name': f'{owner.last_name}, {owner.first_name}', 'email': owner.email, 'roleName': ['dcite:ContactPerson'], 'schemaKey': 'Person', 'affiliation': [], 'includeInCitation': True, }, ], } draft_version = Version( dandiset=dandiset, name=name, metadata=version_metadata, version='draft', ) draft_version.save() uploaded_file = SimpleUploadedFile(name='foo/bar.txt', content=b'A' * 20) etag = '76d36e98f312e98ff908c8c82c8dd623-0' try: asset_blob = AssetBlob.objects.get(etag=etag) except AssetBlob.DoesNotExist: asset_blob = AssetBlob( blob_id=uuid4(), blob=uploaded_file, etag=etag, size=20, ) asset_blob.save() asset_metadata = { 'schemaVersion': settings.DANDI_SCHEMA_VERSION, 'encodingFormat': 'text/plain', 'schemaKey': 'Asset', } asset = Asset(blob=asset_blob, metadata=asset_metadata, path='foo/bar.txt') asset.save() draft_version.assets.add(asset) calculate_sha256(blob_id=asset_blob.blob_id) validate_asset_metadata(asset_id=asset.id) validate_version_metadata(version_id=draft_version.id)
def test_publish_asset(draft_asset: Asset): draft_asset_id = draft_asset.asset_id draft_blob = draft_asset.blob draft_metadata = draft_asset.metadata draft_asset.publish() draft_asset.save() # draft_asset has been published, so it is now published_asset published_asset = draft_asset assert published_asset.blob == draft_blob assert published_asset.metadata == { **draft_metadata, 'id': f'dandiasset:{draft_asset_id}', 'publishedBy': { 'id': URN_RE, 'name': 'DANDI publish', 'startDate': UTC_ISO_TIMESTAMP_RE, 'endDate': UTC_ISO_TIMESTAMP_RE, 'wasAssociatedWith': [ { 'id': URN_RE, 'identifier': 'RRID:SCR_017571', 'name': 'DANDI API', # TODO version the API 'version': '0.1.0', 'schemaKey': 'Software', } ], 'schemaKey': 'PublishActivity', }, 'datePublished': UTC_ISO_TIMESTAMP_RE, 'identifier': str(draft_asset_id), 'contentUrl': [HTTP_URL_RE, HTTP_URL_RE], }
def test_validate_asset_metadata(asset: Asset): tasks.validate_asset_metadata(asset.id) asset.refresh_from_db() assert asset.status == Asset.Status.VALID assert asset.validation_errors == []
def test_validate_asset_metadata_no_schema_version(asset: Asset): asset.metadata = {} asset.save() tasks.validate_asset_metadata(asset.id) asset.refresh_from_db() assert asset.status == Asset.Status.INVALID assert len(asset.validation_errors) == 1 assert asset.validation_errors[0]['field'] == '' assert asset.validation_errors[0]['message'].startswith( 'Metadata version None is not allowed.')
def test_validate_asset_metadata_malformed_keywords(asset: Asset): asset.metadata['keywords'] = 'foo' asset.save() tasks.validate_asset_metadata(asset.id) asset.refresh_from_db() assert asset.status == Asset.Status.INVALID assert asset.validation_errors == [{ 'field': 'keywords', 'message': "'foo' is not of type 'array'" }]
def test_validate_asset_metadata_no_digest(asset: Asset): asset.blob.sha256 = None asset.blob.save() tasks.validate_asset_metadata(asset.id) asset.refresh_from_db() assert asset.status == Asset.Status.INVALID assert asset.validation_errors == [{ 'field': 'digest', 'message': 'A non-zarr asset must have a sha2_256.' }]
def test_validate_asset_metadata_no_encoding_format(asset: Asset): del asset.metadata['encodingFormat'] asset.save() tasks.validate_asset_metadata(asset.id) asset.refresh_from_db() assert asset.status == Asset.Status.INVALID assert asset.validation_errors == [{ 'field': '', 'message': "'encodingFormat' is a required property" }]
def update(self, request, versions__dandiset__pk, versions__version, **kwargs): """Update the metadata of an asset.""" old_asset = self.get_object() version = Version.objects.get( dandiset__pk=versions__dandiset__pk, version=versions__version, ) # TODO @permission_required doesn't work on methods # https://github.com/django-guardian/django-guardian/issues/723 response = get_40x_or_None(request, ['owner'], version.dandiset, return_403=True) if response: return response serializer = AssetRequestSerializer(data=request.data) serializer.is_valid(raise_exception=True) asset_blob = get_object_or_404(AssetBlob, blob_id=serializer.validated_data['blob_id']) metadata = serializer.validated_data['metadata'] if 'path' not in metadata: return Response('No path specified in metadata', status=404) path = metadata['path'] asset_metadata, created = AssetMetadata.objects.get_or_create(metadata=metadata) if created: asset_metadata.save() if asset_metadata == old_asset.metadata and asset_blob == old_asset.blob: # No changes, don't create a new asset new_asset = old_asset else: # Mint a new Asset whenever blob or metadata are modified new_asset = Asset( path=path, blob=asset_blob, metadata=asset_metadata, previous=old_asset, ) new_asset.save() # Replace the old asset with the new one version.assets.add(new_asset) version.assets.remove(old_asset) serializer = AssetDetailSerializer(instance=new_asset) return Response(serializer.data, status=status.HTTP_200_OK)
def asset_from_request(self) -> Asset: """ Return an unsaved Asset, constructed from the request data. Any necessary validation errors will be raised in this method. """ serializer = AssetRequestSerializer(data=self.request.data) serializer.is_valid(raise_exception=True) asset_blob = None embargoed_asset_blob = None zarr_archive = None if 'blob_id' in serializer.validated_data: try: asset_blob = AssetBlob.objects.get(blob_id=serializer.validated_data['blob_id']) except AssetBlob.DoesNotExist: embargoed_asset_blob = get_object_or_404( EmbargoedAssetBlob, blob_id=serializer.validated_data['blob_id'] ) elif 'zarr_id' in serializer.validated_data: zarr_archive = get_object_or_404( ZarrArchive, zarr_id=serializer.validated_data['zarr_id'] ) else: # This shouldn't ever occur raise NotImplementedError('Storage type not handled.') # Construct Asset path = serializer.validated_data['metadata']['path'] metadata = Asset.strip_metadata(serializer.validated_data['metadata']) asset = Asset( path=path, blob=asset_blob, embargoed_blob=embargoed_asset_blob, zarr=zarr_archive, metadata=metadata, status=Asset.Status.PENDING, ) return asset
def create(self, request, versions__dandiset__pk, versions__version): version: Version = get_object_or_404( Version, dandiset=versions__dandiset__pk, version=versions__version, ) # TODO @permission_required doesn't work on methods # https://github.com/django-guardian/django-guardian/issues/723 response = get_40x_or_None(request, ['owner'], version.dandiset, return_403=True) if response: return response serializer = AssetRequestSerializer(data=request.data) serializer.is_valid(raise_exception=True) asset_blob = get_object_or_404(AssetBlob, blob_id=serializer.validated_data['blob_id']) metadata = serializer.validated_data['metadata'] if 'path' not in metadata: return Response('No path specified in metadata.', status=400) path = metadata['path'] asset_metadata, created = AssetMetadata.objects.get_or_create(metadata=metadata) if created: asset_metadata.save() if version.assets.filter(path=path, blob=asset_blob, metadata=asset_metadata).exists(): return Response('Asset already exists.', status=status.HTTP_400_BAD_REQUEST) asset = Asset( path=path, blob=asset_blob, metadata=asset_metadata, ) asset.save() version.assets.add(asset) serializer = AssetDetailSerializer(instance=asset) return Response(serializer.data, status=status.HTTP_200_OK)
def stats_view(self): dandiset_count = Dandiset.objects.count() published_dandiset_count = Dandiset.published_count() user_count = User.objects.count() size = Asset.total_size() return Response( { 'dandiset_count': dandiset_count, 'published_dandiset_count': published_dandiset_count, 'user_count': user_count, 'size': size, } )
def paths(self, request, **kwargs): """ Return the unique files/directories that directly reside under the specified path. The specified path must be a folder; it either must end in a slash or (to refer to the root folder) must be the empty string. """ path_prefix: str = self.request.query_params.get('path_prefix') or '' # Enforce trailing slash if path_prefix and path_prefix[-1] != '/': path_prefix = f'{path_prefix}/' qs = self.get_queryset().filter(path__startswith=path_prefix).values() return Response(Asset.get_path(path_prefix, qs))
def test_asset_total_size(draft_version_factory, asset_factory, asset_blob_factory): # This asset blob should only be counted once, # despite belonging to multiple assets and multiple versions. asset_blob = asset_blob_factory() asset1 = asset_factory(blob=asset_blob) version1 = draft_version_factory() version1.assets.add(asset1) asset2 = asset_factory(blob=asset_blob) version2 = draft_version_factory() version2.assets.add(asset2) # These asset blobs should not be counted since they aren't in any versions. asset_blob_factory() asset_factory() assert Asset.total_size() == asset_blob.size
def test_asset_blob_and_zarr(asset_blob, zarr_archive): # An integrity error is thrown by the constraint that both blob and zarr cannot both be defined with pytest.raises(IntegrityError): Asset(blob=asset_blob, zarr=zarr_archive).save()
def test_asset_no_blob_zarr(): # An attribute error is thrown when it tries to access url fields on the missing foreign keys with pytest.raises(AttributeError): Asset().save()