Beispiel #1
0
def test_deposit_metadata_swhid(
    swhid,
    authenticated_client,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """Posting a swhid reference is stored on raw extrinsic metadata storage"""
    swhid_reference = QualifiedSWHID.from_string(swhid)
    swhid_target = extended_swhid_from_qualified(swhid_reference)

    xml_data = atom_dataset["entry-data-with-swhid"].format(
        swhid=swhid,
        metadata_provenance_url=
        "https://hal-test.archives-ouvertes.fr/hal-abcdefgh",
    )
    deposit_client = authenticated_client.deposit_client

    _insert_object(swh_storage, swhid_reference)

    response = post_atom(
        authenticated_client,
        reverse(COL_IRI, args=[deposit_collection.name]),
        data=xml_data,
    )

    assert response.status_code == status.HTTP_201_CREATED, response.content.decode(
    )
    response_content = ElementTree.fromstring(response.content)

    # Ensure the deposit is finalized
    deposit_id = int(
        response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
    deposit = Deposit.objects.get(pk=deposit_id)
    assert deposit.swhid == str(swhid_target)
    assert deposit.swhid_context == str(swhid_reference)
    assert deposit.complete_date == deposit.reception_date
    assert deposit.complete_date is not None
    assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS

    # Ensure metadata stored in the metadata storage is consistent
    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url=deposit_client.provider_url,
    )

    actual_authority = swh_storage.metadata_authority_get(
        MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url)
    assert actual_authority == metadata_authority

    config = APIConfig()
    metadata_fetcher = MetadataFetcher(
        name=config.tool["name"],
        version=config.tool["version"],
    )

    actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"],
                                                      config.tool["version"])
    assert actual_fetcher == metadata_fetcher

    # Get the deposited metadata object and check it:

    page_results = swh_storage.raw_extrinsic_metadata_get(
        swhid_target, metadata_authority)

    assert len(page_results.results) == 1
    assert page_results.next_page_token is None

    metadata_context = compute_metadata_context(swhid_reference)
    metadata = RawExtrinsicMetadata(
        target=swhid_target,
        discovery_date=deposit.complete_date,
        authority=metadata_authority,
        fetcher=metadata_fetcher,
        format="sword-v2-atom-codemeta",
        metadata=xml_data.encode(),
        **metadata_context,
    )
    assert page_results == PagedResult(
        results=[metadata],
        next_page_token=None,
    )

    # Get metadata about the deposited metadata object and check it:
    _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit,
                                     metadata_fetcher)
Beispiel #2
0
def test_deposit_metadata_origin(
    url,
    authenticated_client,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """Posting a swhid reference is stored on raw extrinsic metadata storage"""
    xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
    origin_swhid = Origin(url).swhid()
    deposit_client = authenticated_client.deposit_client
    swh_storage.origin_add([Origin(url)])
    response = post_atom(
        authenticated_client,
        reverse(COL_IRI, args=[deposit_collection.name]),
        data=xml_data,
    )

    assert response.status_code == status.HTTP_201_CREATED, response.content.decode(
    )
    response_content = ElementTree.fromstring(response.content)
    # Ensure the deposit is finalized
    deposit_id = int(
        response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
    deposit = Deposit.objects.get(pk=deposit_id)
    # we got not swhid as input so we cannot have those
    assert deposit.swhid is None
    assert deposit.swhid_context is None
    assert deposit.complete_date == deposit.reception_date
    assert deposit.complete_date is not None
    assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS

    # Ensure metadata stored in the metadata storage is consistent
    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url=deposit_client.provider_url,
    )

    actual_authority = swh_storage.metadata_authority_get(
        MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url)
    assert actual_authority == metadata_authority

    config = APIConfig()
    metadata_fetcher = MetadataFetcher(
        name=config.tool["name"],
        version=config.tool["version"],
    )

    actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"],
                                                      config.tool["version"])
    assert actual_fetcher == metadata_fetcher

    # Get the deposited metadata object and check it:

    page_results = swh_storage.raw_extrinsic_metadata_get(
        origin_swhid, metadata_authority)

    assert len(page_results.results) == 1
    assert page_results.next_page_token is None

    metadata = RawExtrinsicMetadata(
        target=origin_swhid,
        discovery_date=deposit.complete_date,
        authority=metadata_authority,
        fetcher=metadata_fetcher,
        format="sword-v2-atom-codemeta",
        metadata=xml_data.encode(),
    )
    assert page_results == PagedResult(
        results=[metadata],
        next_page_token=None,
    )

    # Get metadata about the deposited metadata object and check it:
    _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit,
                                     metadata_fetcher)
Beispiel #3
0
    def _store_metadata_deposit(
        self,
        deposit: Deposit,
        swhid_reference: Union[str, QualifiedSWHID],
        metadata_tree: ElementTree.Element,
        raw_metadata: bytes,
        deposit_origin: Optional[str] = None,
    ) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]:
        """When all user inputs pass the checks, this associates the raw_metadata to the
           swhid_reference in the raw extrinsic metadata storage. In case of any issues,
           a bad request response is returned to the user with the details.

            Checks:
            - metadata are technically parsable
            - metadata pass the functional checks
            - SWHID (if any) is technically valid

        Args:
            deposit: Deposit reference
            swhid_reference: The swhid or the origin to attach metadata information to
            metadata_tree: Full element tree of metadata to check for validity
              (parsed out of raw_metadata)
            raw_metadata: The actual raw metadata to send in the storage metadata
            deposit_origin: Optional deposit origin url to use if any (e.g. deposit
              update scenario provides one)

        Raises:
            DepositError in case of incorrect inputs from the deposit client
            (e.g. functionally invalid metadata, ...)

        Returns:
            Tuple of target swhid, deposit, and deposit request

        """
        metadata_ok, error_details = check_metadata(metadata_tree)
        if not metadata_ok:
            assert error_details, "Details should be set when a failure occurs"
            raise DepositError(
                BAD_REQUEST,
                "Functional metadata checks failure",
                convert_status_detail(error_details),
            )

        metadata_authority = MetadataAuthority(
            type=MetadataAuthorityType.DEPOSIT_CLIENT,
            url=deposit.client.provider_url,
        )

        metadata_fetcher = self.swh_deposit_fetcher()

        # replace metadata within the deposit backend
        deposit_request_data = {
            RAW_METADATA_KEY: raw_metadata,
        }

        # actually add the metadata to the completed deposit
        deposit_request = self._deposit_request_put(deposit, deposit_request_data)

        target_swhid: ExtendedSWHID  # origin URL or CoreSWHID
        if isinstance(swhid_reference, str):
            target_swhid = Origin(swhid_reference).swhid()
            metadata_context = {}
        else:
            metadata_context = compute_metadata_context(swhid_reference)
            if deposit_origin:  # metadata deposit update on completed deposit
                metadata_context["origin"] = deposit_origin

            target_swhid = extended_swhid_from_qualified(swhid_reference)

        self._check_swhid_in_archive(target_swhid)

        # metadata deposited by the client
        metadata_object = RawExtrinsicMetadata(
            target=target_swhid,  # core swhid or origin
            discovery_date=deposit_request.date,
            authority=metadata_authority,
            fetcher=metadata_fetcher,
            format="sword-v2-atom-codemeta",
            metadata=raw_metadata,
            **metadata_context,
        )

        # metadata on the metadata object
        swh_deposit_authority = self.swh_deposit_authority()
        swh_deposit_fetcher = self.swh_deposit_fetcher()
        metametadata_object = RawExtrinsicMetadata(
            target=metadata_object.swhid(),
            discovery_date=deposit_request.date,
            authority=swh_deposit_authority,
            fetcher=swh_deposit_fetcher,
            format="xml-deposit-info",
            metadata=render_to_string(
                "deposit/deposit_info.xml", context={"deposit": deposit}
            ).encode(),
        )

        # write to metadata storage
        self.storage_metadata.metadata_authority_add(
            [metadata_authority, swh_deposit_authority]
        )
        self.storage_metadata.metadata_fetcher_add(
            [metadata_fetcher, swh_deposit_fetcher]
        )
        self.storage_metadata.raw_extrinsic_metadata_add(
            [metadata_object, metametadata_object]
        )

        return (target_swhid, deposit, deposit_request)