Example #1
0
def test_version_rest_publish_zarr(
    api_client,
    user: User,
    draft_version: Version,
    draft_asset_factory,
    zarr_archive_factory,
):
    assign_perm('owner', user, draft_version.dandiset)
    api_client.force_authenticate(user=user)

    zarr_archive = zarr_archive_factory(dandiset=draft_version.dandiset)
    zarr_asset: Asset = draft_asset_factory(zarr=zarr_archive, blob=None)
    normal_asset: Asset = draft_asset_factory()
    draft_version.assets.add(zarr_asset)
    draft_version.assets.add(normal_asset)

    # Validate the metadata to mark the assets and version as `VALID`
    tasks.validate_asset_metadata(zarr_asset.id)
    tasks.validate_asset_metadata(normal_asset.id)
    tasks.validate_version_metadata(draft_version.id)
    draft_version.refresh_from_db()
    assert draft_version.valid

    resp = api_client.post(
        f'/api/dandisets/{draft_version.dandiset.identifier}'
        f'/versions/{draft_version.version}/publish/'
    )
    assert resp.status_code == 400
    assert resp.json() == ['Cannot publish dandisets which contain zarrs']
Example #2
0
def revalidate(assets: bool, versions: bool, revalidate_all: bool,
               dry_run: bool):
    """
    Revalidate all Versions and Assets.

    This script will run the validation immediately in band without dispatching tasks to the queue.
    """
    if assets:
        asset_qs = Asset.objects
        if not revalidate_all:
            asset_qs = asset_qs.filter(status=Asset.Status.INVALID)
        click.echo(f'Revalidating {asset_qs.count()} assets')
        if not dry_run:
            for asset in asset_qs.values('id'):
                validate_asset_metadata(asset['id'])

    if versions:
        # Only revalidate draft versions
        version_qs = Version.objects.filter(version='draft')
        if not revalidate_all:
            version_qs = version_qs.filter(status=Version.Status.INVALID, )
        click.echo(f'Revalidating {version_qs.count()} versions')
        if not dry_run:
            for version in version_qs.values('id'):
                validate_version_metadata(version['id'])
Example #3
0
def create_dev_dandiset(name: str, owner: str):
    owner = User.objects.get(email=owner)

    # Create a new dandiset
    dandiset = Dandiset()
    dandiset.save()
    dandiset.add_owner(owner)

    # Create the draft version
    version_metadata = {
        'schemaVersion':
        settings.DANDI_SCHEMA_VERSION,
        'schemaKey':
        'Dandiset',
        'description':
        'An informative description',
        'license': ['spdx:CC0-1.0'],
        'contributor': [
            {
                'name': f'{owner.last_name}, {owner.first_name}',
                'email': owner.email,
                'roleName': ['dcite:ContactPerson'],
                'schemaKey': 'Person',
                'affiliation': [],
                'includeInCitation': True,
            },
        ],
    }
    draft_version = Version(
        dandiset=dandiset,
        name=name,
        metadata=version_metadata,
        version='draft',
    )
    draft_version.save()

    uploaded_file = SimpleUploadedFile(name='foo/bar.txt', content=b'A' * 20)
    etag = '76d36e98f312e98ff908c8c82c8dd623-0'
    try:
        asset_blob = AssetBlob.objects.get(etag=etag)
    except AssetBlob.DoesNotExist:
        asset_blob = AssetBlob(
            blob_id=uuid4(),
            blob=uploaded_file,
            etag=etag,
            size=20,
        )
        asset_blob.save()
    asset_metadata = {
        'schemaVersion': settings.DANDI_SCHEMA_VERSION,
        'encodingFormat': 'text/plain',
        'schemaKey': 'Asset',
    }
    asset = Asset(blob=asset_blob, metadata=asset_metadata, path='foo/bar.txt')
    asset.save()
    draft_version.assets.add(asset)

    calculate_sha256(blob_id=asset_blob.blob_id)
    validate_asset_metadata(asset_id=asset.id)
    validate_version_metadata(version_id=draft_version.id)
Example #4
0
def test_unembargo_dandiset_existing_blobs(
    dandiset_factory,
    draft_version_factory,
    asset_factory,
    asset_blob_factory,
    embargoed_asset_blob_factory,
    storage_tuple,
):
    # Pretend like AssetBlob/EmbargoedAssetBlob were defined with the given storage
    storage, embargoed_storage = storage_tuple
    AssetBlob.blob.field.storage = storage
    EmbargoedAssetBlob.blob.field.storage = embargoed_storage

    # Create dandiset and version
    dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED)
    draft_version: Version = draft_version_factory(dandiset=dandiset)

    # Create embargoed assets
    embargoed_asset_blob: EmbargoedAssetBlob = embargoed_asset_blob_factory()
    embargoed_asset: Asset = asset_factory(embargoed_blob=embargoed_asset_blob, blob=None)
    draft_version.assets.add(embargoed_asset)

    # Create unembargoed asset with identical data
    embargoed_asset_blob_data = embargoed_asset_blob.blob.read()
    embargoed_asset_blob.blob.seek(0)
    existing_asset_blob = asset_blob_factory(
        blob=factory.django.FileField(data=embargoed_asset_blob_data)
    )

    # Assert properties before unembargo
    assert embargoed_asset.embargoed_blob is not None
    assert embargoed_asset.blob is None
    assert embargoed_asset.embargoed_blob.etag != ''
    assert existing_asset_blob.etag != ''
    assert embargoed_asset_blob.etag == existing_asset_blob.etag

    # Run unembargo
    tasks.unembargo_dandiset(dandiset.pk)
    tasks.validate_version_metadata(draft_version.pk)
    dandiset.refresh_from_db()
    draft_version.refresh_from_db()

    # Assert correct changes took place
    assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN
    assert draft_version.status == Version.Status.VALID
    assert draft_version.metadata['access'] == [
        {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'}
    ]

    # Assert no new asset created
    asset: Asset = draft_version.assets.first()
    assert asset == embargoed_asset

    # Check blobs
    assert asset.embargoed_blob is None
    assert asset.blob is not None
    assert asset.blob.etag == embargoed_asset_blob.etag
    assert asset.blob == existing_asset_blob
Example #5
0
def test_validate_version_metadata(version: Version, asset: Asset):
    version.assets.add(asset)

    tasks.validate_version_metadata(version.id)

    version.refresh_from_db()

    assert version.status == Version.Status.VALID
    assert version.validation_errors == []
Example #6
0
def test_validate_version_metadata_malformed_schema_version(
        version: Version, asset: Asset):
    version.assets.add(asset)

    version.metadata['schemaVersion'] = 'xxx'
    version.save()

    tasks.validate_version_metadata(version.id)

    version.refresh_from_db()

    assert version.status == Version.Status.INVALID
    assert len(version.validation_errors) == 1
    assert version.validation_errors[0]['message'].startswith(
        'Metadata version xxx is not allowed.')
Example #7
0
def test_unembargo_dandiset_normal_asset_blob(
    dandiset_factory,
    draft_version_factory,
    asset_factory,
    asset_blob_factory,
    storage,
):
    # Pretend like AssetBlob was defined with the given storage
    AssetBlob.blob.field.storage = storage

    # Create dandiset and version
    dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED)
    draft_version: Version = draft_version_factory(dandiset=dandiset)

    # Create asset
    asset_blob: AssetBlob = asset_blob_factory()
    asset: Asset = asset_factory(blob=asset_blob, embargoed_blob=None)
    draft_version.assets.add(asset)

    # Assert properties before unembargo
    assert asset.embargoed_blob is None
    assert asset.blob is not None

    # Run unembargo
    tasks.unembargo_dandiset(dandiset.pk)
    tasks.validate_version_metadata(draft_version.pk)
    dandiset.refresh_from_db()
    draft_version.refresh_from_db()

    # Assert correct changes took place
    assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN
    assert draft_version.status == Version.Status.VALID
    assert draft_version.metadata['access'] == [
        {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'}
    ]

    # Assert no new asset created
    fetched_asset: Asset = draft_version.assets.first()
    assert asset == fetched_asset

    # Check that blob is unchanged
    assert fetched_asset.blob == asset_blob
    assert asset.embargoed_blob is None
    assert asset.blob is not None
    assert asset.blob.etag
    assert asset.blob
Example #8
0
def test_validate_version_metadata_malformed_license(version: Version,
                                                     asset: Asset):
    version.assets.add(asset)

    version.metadata['license'] = 'foo'
    version.save()

    tasks.validate_version_metadata(version.id)

    version.refresh_from_db()

    assert version.status == Version.Status.INVALID
    assert version.validation_errors == [{
        'field':
        'license',
        'message':
        "'foo' is not of type 'array'"
    }]
Example #9
0
def test_validate_version_metadata_no_description(version: Version,
                                                  asset: Asset):
    version.assets.add(asset)

    del version.metadata['description']
    version.save()

    tasks.validate_version_metadata(version.id)

    version.refresh_from_db()

    assert version.status == Version.Status.INVALID
    assert version.validation_errors == [{
        'field':
        '',
        'message':
        "'description' is a required property"
    }]
Example #10
0
def test_version_rest_publish_assets(
    api_client,
    user: User,
    draft_version: Version,
    draft_asset_factory,
    published_asset_factory,
):
    assign_perm('owner', user, draft_version.dandiset)
    api_client.force_authenticate(user=user)

    old_draft_asset: Asset = draft_asset_factory()
    old_published_asset: Asset = published_asset_factory()
    old_published_asset.publish()
    old_published_asset.save()
    assert not old_draft_asset.published
    assert old_published_asset.published
    draft_version.assets.add(old_draft_asset)
    draft_version.assets.add(old_published_asset)

    # Validate the metadata to mark the assets and version as `VALID`
    tasks.validate_asset_metadata(old_draft_asset.id)
    tasks.validate_asset_metadata(old_published_asset.id)
    tasks.validate_version_metadata(draft_version.id)
    draft_version.refresh_from_db()
    assert draft_version.valid

    resp = api_client.post(
        f'/api/dandisets/{draft_version.dandiset.identifier}'
        f'/versions/{draft_version.version}/publish/'
    )
    assert resp.status_code == 200
    published_version = Version.objects.get(version=resp.data['version'])

    assert published_version.assets.count() == 2
    new_draft_asset: Asset = published_version.assets.get(asset_id=old_draft_asset.asset_id)
    new_published_asset: Asset = published_version.assets.get(asset_id=old_published_asset.asset_id)

    # The former draft asset should have been modified into a published asset
    assert new_draft_asset.published
    assert new_draft_asset.asset_id == old_draft_asset.asset_id
    assert new_draft_asset.path == old_draft_asset.path
    assert new_draft_asset.blob == old_draft_asset.blob
    assert new_draft_asset.metadata == {
        **old_draft_asset.metadata,
        'datePublished': UTC_ISO_TIMESTAMP_RE,
        'publishedBy': {
            'id': URN_RE,
            'name': 'DANDI publish',
            'startDate': UTC_ISO_TIMESTAMP_RE,
            # TODO endDate needs to be defined before publish is complete
            'endDate': UTC_ISO_TIMESTAMP_RE,
            'wasAssociatedWith': [
                {
                    'id': URN_RE,
                    'identifier': 'RRID:SCR_017571',
                    'name': 'DANDI API',
                    'version': '0.1.0',
                    'schemaKey': 'Software',
                }
            ],
            'schemaKey': 'PublishActivity',
        },
    }

    # The published_asset should be completely unchanged
    assert new_published_asset.published
    assert new_published_asset.asset_id == old_published_asset.asset_id
    assert new_published_asset.path == old_published_asset.path
    assert new_published_asset.blob == old_published_asset.blob
    assert new_published_asset.metadata == old_published_asset.metadata
Example #11
0
def test_version_rest_publish(api_client, user: User, draft_version: Version, asset: Asset):
    assign_perm('owner', user, draft_version.dandiset)
    api_client.force_authenticate(user=user)
    draft_version.assets.add(asset)

    # Validate the metadata to mark the version and asset as `VALID`
    tasks.validate_version_metadata(draft_version.id)
    tasks.validate_asset_metadata(asset.id)
    draft_version.refresh_from_db()
    assert draft_version.valid

    resp = api_client.post(
        f'/api/dandisets/{draft_version.dandiset.identifier}'
        f'/versions/{draft_version.version}/publish/'
    )
    assert resp.data == {
        'dandiset': {
            'identifier': draft_version.dandiset.identifier,
            'created': TIMESTAMP_RE,
            'modified': TIMESTAMP_RE,
            'contact_person': draft_version.metadata['contributor'][0]['name'],
            'embargo_status': 'OPEN',
        },
        'version': VERSION_ID_RE,
        'name': draft_version.name,
        'created': TIMESTAMP_RE,
        'modified': TIMESTAMP_RE,
        'asset_count': 1,
        'size': draft_version.size,
        'status': 'Valid',
    }
    published_version = Version.objects.get(version=resp.data['version'])
    assert published_version
    assert draft_version.dandiset.versions.count() == 2

    published_asset: Asset = published_version.assets.get()
    assert published_asset.published
    # The asset should be the same after publishing
    assert asset.asset_id == published_asset.asset_id

    assert published_version.metadata == {
        **draft_version.metadata,
        'publishedBy': {
            'id': URN_RE,
            'name': 'DANDI publish',
            'startDate': UTC_ISO_TIMESTAMP_RE,
            'endDate': UTC_ISO_TIMESTAMP_RE,
            'wasAssociatedWith': [
                {
                    'id': URN_RE,
                    'identifier': 'RRID:SCR_017571',
                    'name': 'DANDI API',
                    # TODO version the API
                    'version': '0.1.0',
                    'schemaKey': 'Software',
                }
            ],
            'schemaKey': 'PublishActivity',
        },
        'datePublished': UTC_ISO_TIMESTAMP_RE,
        'manifestLocation': [
            f'http://{settings.MINIO_STORAGE_ENDPOINT}/test-dandiapi-dandisets/test-prefix/dandisets/{draft_version.dandiset.identifier}/{published_version.version}/assets.yaml',  # noqa: E501
        ],
        'identifier': f'DANDI:{draft_version.dandiset.identifier}',
        'version': published_version.version,
        'id': f'DANDI:{draft_version.dandiset.identifier}/{published_version.version}',
        'url': (
            f'{settings.DANDI_WEB_APP_URL}/dandiset/{draft_version.dandiset.identifier}'
            f'/{published_version.version}'
        ),
        'citation': published_version.citation(published_version.metadata),
        'doi': f'10.80507/dandi.{draft_version.dandiset.identifier}/{published_version.version}',
        # Once the assets are linked, assetsSummary should be computed properly
        'assetsSummary': {
            'schemaKey': 'AssetsSummary',
            'numberOfBytes': 100,
            'numberOfFiles': 1,
            'dataStandard': [
                {
                    'schemaKey': 'StandardsType',
                    'identifier': 'RRID:SCR_015242',
                    'name': 'Neurodata Without Borders (NWB)',
                }
            ],
            'approach': [],
            'measurementTechnique': [],
            'variableMeasured': [],
            'species': [],
        },
    }

    draft_version.refresh_from_db()
    assert draft_version.status == Version.Status.PUBLISHED
    assert not draft_version.valid
Example #12
0
def test_unembargo_dandiset(
    dandiset_factory,
    draft_version_factory,
    asset_factory,
    embargoed_asset_blob_factory,
    storage_tuple,
    file_size,
    part_size,
    monkeypatch,
):
    # Pretend like AssetBlob/EmbargoedAssetBlob were defined with the given storage
    storage, embargoed_storage = storage_tuple
    monkeypatch.setattr(AssetBlob.blob.field, 'storage', storage)
    monkeypatch.setattr(EmbargoedAssetBlob.blob.field, 'storage', embargoed_storage)

    # Monkey patch PartGenerator so that upload and copy use a smaller part size
    monkeypatch.setattr(PartGenerator, 'DEFAULT_PART_SIZE', part_size, raising=True)

    # Create dandiset and version
    dandiset: Dandiset = dandiset_factory(embargo_status=Dandiset.EmbargoStatus.EMBARGOED)
    draft_version: Version = draft_version_factory(dandiset=dandiset)

    # Create an embargoed asset blob
    embargoed_asset_blob: EmbargoedAssetBlob = embargoed_asset_blob_factory(
        size=file_size, blob=SimpleUploadedFile('test', content=os.urandom(file_size))
    )

    # Assert multiple parts were used
    num_parts = math.ceil(file_size / part_size)
    assert embargoed_asset_blob.etag.endswith(f'-{num_parts}')

    # Create asset from embargoed blob
    embargoed_asset: Asset = asset_factory(embargoed_blob=embargoed_asset_blob, blob=None)
    draft_version.assets.add(embargoed_asset)

    # Assert properties before unembargo
    assert embargoed_asset.embargoed_blob is not None
    assert embargoed_asset.blob is None
    assert embargoed_asset.embargoed_blob.etag != ''

    # Run unembargo and validate version metadata
    tasks.unembargo_dandiset(dandiset.pk)
    tasks.validate_version_metadata(draft_version.pk)
    dandiset.refresh_from_db()
    draft_version.refresh_from_db()

    # Assert correct changes took place
    assert dandiset.embargo_status == Dandiset.EmbargoStatus.OPEN
    assert draft_version.status == Version.Status.VALID
    assert draft_version.metadata['access'] == [
        {'schemaKey': 'AccessRequirements', 'status': 'dandi:OpenAccess'}
    ]

    # Assert no new asset created
    asset: Asset = draft_version.assets.first()
    assert asset == embargoed_asset

    # Check blobs
    assert asset.embargoed_blob is None
    assert asset.blob is not None
    assert asset.blob.etag == embargoed_asset_blob.etag

    blob_id = str(asset.blob.blob_id)
    assert asset.blob.blob.name == f'test-prefix/blobs/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}'