Esempio n. 1
0
def create_dev_dandiset(name: str, owner: str):
    owner = User.objects.get(email=owner)

    # Create a new dandiset
    dandiset = Dandiset()
    dandiset.save()
    dandiset.add_owner(owner)

    # Create the draft version
    version_metadata = {
        'schemaVersion':
        settings.DANDI_SCHEMA_VERSION,
        'schemaKey':
        'Dandiset',
        'description':
        'An informative description',
        'license': ['spdx:CC0-1.0'],
        'contributor': [
            {
                'name': f'{owner.last_name}, {owner.first_name}',
                'email': owner.email,
                'roleName': ['dcite:ContactPerson'],
                'schemaKey': 'Person',
                'affiliation': [],
                'includeInCitation': True,
            },
        ],
    }
    draft_version = Version(
        dandiset=dandiset,
        name=name,
        metadata=version_metadata,
        version='draft',
    )
    draft_version.save()

    uploaded_file = SimpleUploadedFile(name='foo/bar.txt', content=b'A' * 20)
    etag = '76d36e98f312e98ff908c8c82c8dd623-0'
    try:
        asset_blob = AssetBlob.objects.get(etag=etag)
    except AssetBlob.DoesNotExist:
        asset_blob = AssetBlob(
            blob_id=uuid4(),
            blob=uploaded_file,
            etag=etag,
            size=20,
        )
        asset_blob.save()
    asset_metadata = {
        'schemaVersion': settings.DANDI_SCHEMA_VERSION,
        'encodingFormat': 'text/plain',
        'schemaKey': 'Asset',
    }
    asset = Asset(blob=asset_blob, metadata=asset_metadata, path='foo/bar.txt')
    asset.save()
    draft_version.assets.add(asset)

    calculate_sha256(blob_id=asset_blob.blob_id)
    validate_asset_metadata(asset_id=asset.id)
    validate_version_metadata(version_id=draft_version.id)
Esempio n. 2
0
def test_publish_asset(draft_asset: Asset):
    draft_asset_id = draft_asset.asset_id
    draft_blob = draft_asset.blob
    draft_metadata = draft_asset.metadata
    draft_asset.publish()
    draft_asset.save()

    # draft_asset has been published, so it is now published_asset
    published_asset = draft_asset

    assert published_asset.blob == draft_blob
    assert published_asset.metadata == {
        **draft_metadata,
        'id': f'dandiasset:{draft_asset_id}',
        'publishedBy': {
            'id': URN_RE,
            'name': 'DANDI publish',
            'startDate': UTC_ISO_TIMESTAMP_RE,
            'endDate': UTC_ISO_TIMESTAMP_RE,
            'wasAssociatedWith': [
                {
                    'id': URN_RE,
                    'identifier': 'RRID:SCR_017571',
                    'name': 'DANDI API',
                    # TODO version the API
                    'version': '0.1.0',
                    'schemaKey': 'Software',
                }
            ],
            'schemaKey': 'PublishActivity',
        },
        'datePublished': UTC_ISO_TIMESTAMP_RE,
        'identifier': str(draft_asset_id),
        'contentUrl': [HTTP_URL_RE, HTTP_URL_RE],
    }
Esempio n. 3
0
def test_validate_asset_metadata(asset: Asset):
    tasks.validate_asset_metadata(asset.id)

    asset.refresh_from_db()

    assert asset.status == Asset.Status.VALID
    assert asset.validation_errors == []
Esempio n. 4
0
def test_validate_asset_metadata_no_schema_version(asset: Asset):
    asset.metadata = {}
    asset.save()

    tasks.validate_asset_metadata(asset.id)

    asset.refresh_from_db()

    assert asset.status == Asset.Status.INVALID
    assert len(asset.validation_errors) == 1
    assert asset.validation_errors[0]['field'] == ''
    assert asset.validation_errors[0]['message'].startswith(
        'Metadata version None is not allowed.')
Esempio n. 5
0
def test_validate_asset_metadata_malformed_keywords(asset: Asset):
    asset.metadata['keywords'] = 'foo'
    asset.save()

    tasks.validate_asset_metadata(asset.id)

    asset.refresh_from_db()

    assert asset.status == Asset.Status.INVALID
    assert asset.validation_errors == [{
        'field':
        'keywords',
        'message':
        "'foo' is not of type 'array'"
    }]
Esempio n. 6
0
def test_validate_asset_metadata_no_digest(asset: Asset):
    asset.blob.sha256 = None
    asset.blob.save()

    tasks.validate_asset_metadata(asset.id)

    asset.refresh_from_db()

    assert asset.status == Asset.Status.INVALID
    assert asset.validation_errors == [{
        'field':
        'digest',
        'message':
        'A non-zarr asset must have a sha2_256.'
    }]
Esempio n. 7
0
def test_validate_asset_metadata_no_encoding_format(asset: Asset):
    del asset.metadata['encodingFormat']
    asset.save()

    tasks.validate_asset_metadata(asset.id)

    asset.refresh_from_db()

    assert asset.status == Asset.Status.INVALID
    assert asset.validation_errors == [{
        'field':
        '',
        'message':
        "'encodingFormat' is a required property"
    }]
Esempio n. 8
0
    def update(self, request, versions__dandiset__pk, versions__version, **kwargs):
        """Update the metadata of an asset."""
        old_asset = self.get_object()
        version = Version.objects.get(
            dandiset__pk=versions__dandiset__pk,
            version=versions__version,
        )

        # TODO @permission_required doesn't work on methods
        # https://github.com/django-guardian/django-guardian/issues/723
        response = get_40x_or_None(request, ['owner'], version.dandiset, return_403=True)
        if response:
            return response

        serializer = AssetRequestSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        asset_blob = get_object_or_404(AssetBlob, blob_id=serializer.validated_data['blob_id'])

        metadata = serializer.validated_data['metadata']
        if 'path' not in metadata:
            return Response('No path specified in metadata', status=404)
        path = metadata['path']
        asset_metadata, created = AssetMetadata.objects.get_or_create(metadata=metadata)
        if created:
            asset_metadata.save()

        if asset_metadata == old_asset.metadata and asset_blob == old_asset.blob:
            # No changes, don't create a new asset
            new_asset = old_asset
        else:
            # Mint a new Asset whenever blob or metadata are modified
            new_asset = Asset(
                path=path,
                blob=asset_blob,
                metadata=asset_metadata,
                previous=old_asset,
            )
            new_asset.save()

            # Replace the old asset with the new one
            version.assets.add(new_asset)
            version.assets.remove(old_asset)

        serializer = AssetDetailSerializer(instance=new_asset)
        return Response(serializer.data, status=status.HTTP_200_OK)
Esempio n. 9
0
    def asset_from_request(self) -> Asset:
        """
        Return an unsaved Asset, constructed from the request data.

        Any necessary validation errors will be raised in this method.
        """
        serializer = AssetRequestSerializer(data=self.request.data)
        serializer.is_valid(raise_exception=True)

        asset_blob = None
        embargoed_asset_blob = None
        zarr_archive = None
        if 'blob_id' in serializer.validated_data:
            try:
                asset_blob = AssetBlob.objects.get(blob_id=serializer.validated_data['blob_id'])
            except AssetBlob.DoesNotExist:
                embargoed_asset_blob = get_object_or_404(
                    EmbargoedAssetBlob, blob_id=serializer.validated_data['blob_id']
                )
        elif 'zarr_id' in serializer.validated_data:
            zarr_archive = get_object_or_404(
                ZarrArchive, zarr_id=serializer.validated_data['zarr_id']
            )
        else:
            # This shouldn't ever occur
            raise NotImplementedError('Storage type not handled.')

        # Construct Asset
        path = serializer.validated_data['metadata']['path']
        metadata = Asset.strip_metadata(serializer.validated_data['metadata'])
        asset = Asset(
            path=path,
            blob=asset_blob,
            embargoed_blob=embargoed_asset_blob,
            zarr=zarr_archive,
            metadata=metadata,
            status=Asset.Status.PENDING,
        )

        return asset
Esempio n. 10
0
    def create(self, request, versions__dandiset__pk, versions__version):
        version: Version = get_object_or_404(
            Version,
            dandiset=versions__dandiset__pk,
            version=versions__version,
        )

        # TODO @permission_required doesn't work on methods
        # https://github.com/django-guardian/django-guardian/issues/723
        response = get_40x_or_None(request, ['owner'], version.dandiset, return_403=True)
        if response:
            return response

        serializer = AssetRequestSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)

        asset_blob = get_object_or_404(AssetBlob, blob_id=serializer.validated_data['blob_id'])

        metadata = serializer.validated_data['metadata']
        if 'path' not in metadata:
            return Response('No path specified in metadata.', status=400)
        path = metadata['path']
        asset_metadata, created = AssetMetadata.objects.get_or_create(metadata=metadata)
        if created:
            asset_metadata.save()

        if version.assets.filter(path=path, blob=asset_blob, metadata=asset_metadata).exists():
            return Response('Asset already exists.', status=status.HTTP_400_BAD_REQUEST)

        asset = Asset(
            path=path,
            blob=asset_blob,
            metadata=asset_metadata,
        )
        asset.save()
        version.assets.add(asset)

        serializer = AssetDetailSerializer(instance=asset)
        return Response(serializer.data, status=status.HTTP_200_OK)
Esempio n. 11
0
def stats_view(self):
    dandiset_count = Dandiset.objects.count()
    published_dandiset_count = Dandiset.published_count()
    user_count = User.objects.count()
    size = Asset.total_size()
    return Response(
        {
            'dandiset_count': dandiset_count,
            'published_dandiset_count': published_dandiset_count,
            'user_count': user_count,
            'size': size,
        }
    )
Esempio n. 12
0
    def paths(self, request, **kwargs):
        """
        Return the unique files/directories that directly reside under the specified path.

        The specified path must be a folder; it either must end in a slash or
        (to refer to the root folder) must be the empty string.
        """
        path_prefix: str = self.request.query_params.get('path_prefix') or ''
        # Enforce trailing slash
        if path_prefix and path_prefix[-1] != '/':
            path_prefix = f'{path_prefix}/'
        qs = self.get_queryset().filter(path__startswith=path_prefix).values()

        return Response(Asset.get_path(path_prefix, qs))
Esempio n. 13
0
def test_asset_total_size(draft_version_factory, asset_factory, asset_blob_factory):
    # This asset blob should only be counted once,
    # despite belonging to multiple assets and multiple versions.
    asset_blob = asset_blob_factory()

    asset1 = asset_factory(blob=asset_blob)
    version1 = draft_version_factory()
    version1.assets.add(asset1)

    asset2 = asset_factory(blob=asset_blob)
    version2 = draft_version_factory()
    version2.assets.add(asset2)

    # These asset blobs should not be counted since they aren't in any versions.
    asset_blob_factory()
    asset_factory()

    assert Asset.total_size() == asset_blob.size
Esempio n. 14
0
def test_asset_blob_and_zarr(asset_blob, zarr_archive):
    # An integrity error is thrown by the constraint that both blob and zarr cannot both be defined
    with pytest.raises(IntegrityError):
        Asset(blob=asset_blob, zarr=zarr_archive).save()
Esempio n. 15
0
def test_asset_no_blob_zarr():
    # An attribute error is thrown when it tries to access url fields on the missing foreign keys
    with pytest.raises(AttributeError):
        Asset().save()