コード例 #1
0
def test_put_atom_with_create_origin_and_external_identifier(
        authenticated_client, deposit_collection, atom_dataset, deposit_user):
    """<atom:external_identifier> was deprecated before <swh:create_origin>
    was introduced, clients should get an error when trying to use both

    """
    external_id = "foobar"
    origin_url = deposit_user.provider_url + external_id
    url = reverse(COL_IRI, args=[deposit_collection.name])

    response = post_atom(
        authenticated_client,
        url,
        data=atom_dataset["entry-data0"] % origin_url,
        HTTP_IN_PROGRESS="true",
    )

    assert response.status_code == status.HTTP_201_CREATED
    response_content = parse_xml(response.content)

    edit_iri = response_content.find("atom:link[@rel='edit']",
                                     namespaces=NAMESPACES).attrib["href"]

    # when
    response = put_atom(
        authenticated_client,
        edit_iri,
        data=atom_dataset["error-with-external-identifier"] % external_id,
        HTTP_IN_PROGRESS="false",
    )

    assert b"&lt;external_identifier&gt; is deprecated" in response.content
    assert response.status_code == status.HTTP_400_BAD_REQUEST
コード例 #2
0
def test_put_atom_with_create_origin_and_reference(authenticated_client,
                                                   deposit_collection,
                                                   atom_dataset, deposit_user):
    """<swh:reference> and <swh:create_origin> are mutually exclusive"""
    external_id = "foobar"
    origin_url = deposit_user.provider_url + external_id
    url = reverse(COL_IRI, args=[deposit_collection.name])

    response = post_atom(
        authenticated_client,
        url,
        data=atom_dataset["entry-data0"] % origin_url,
        HTTP_IN_PROGRESS="true",
    )

    assert response.status_code == status.HTTP_201_CREATED
    response_content = parse_xml(response.content)

    edit_iri = response_content.find("atom:link[@rel='edit']",
                                     namespaces=NAMESPACES).attrib["href"]

    # when
    response = put_atom(
        authenticated_client,
        edit_iri,
        data=atom_dataset["entry-data-with-origin-reference"].format(
            url=origin_url),
        HTTP_IN_PROGRESS="false",
    )

    assert b"only one may be used on a given deposit" in response.content
    assert response.status_code == status.HTTP_400_BAD_REQUEST
コード例 #3
0
def test_put_update_metadata_done_deposit_failure_empty_xml(
    tmp_path,
    authenticated_client,
    complete_deposit,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """failure: client updates metadata on deposit done with an empty xml.

    Response: 400

    """
    update_uri = reverse(EDIT_IRI,
                         args=[deposit_collection.name, complete_deposit.id])

    atom_content = atom_dataset["entry-data-empty-body"]
    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_content,
        HTTP_X_CHECK_SWHID=complete_deposit.swhid,
    )

    assert response.status_code == status.HTTP_400_BAD_REQUEST
    assert b"Empty body request is not supported" in response.content
コード例 #4
0
def test_put_update_metadata_done_deposit_failure_functional_checks(
    tmp_path,
    authenticated_client,
    complete_deposit,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """failure: client updates metadata on deposit done without required incomplete metadata

    Response: 400

    """
    update_uri = reverse(EDIT_IRI,
                         args=[deposit_collection.name, complete_deposit.id])

    response = put_atom(
        authenticated_client,
        update_uri,
        # no title, nor author, nor name fields
        data=atom_dataset["entry-data-fail-metadata-functional-checks"],
        HTTP_X_CHECK_SWHID=complete_deposit.swhid,
    )

    assert response.status_code == status.HTTP_400_BAD_REQUEST
    assert b"Functional metadata checks failure" in response.content
    # detail on the errors
    msg = (b"- Mandatory fields are missing ("
           b"atom:name or atom:title or codemeta:name, "
           b"atom:author or codemeta:author)")
    assert msg in response.content
コード例 #5
0
def test_put_update_metadata_done_deposit_failure_mismatched_swhid(
    tmp_path,
    authenticated_client,
    complete_deposit,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """failure: client updates metadata on deposit with SWHID not matching the deposit's.

    Response: 400

    """
    incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
    assert complete_deposit.swhid != incorrect_swhid

    update_uri = reverse(EDIT_IRI,
                         args=[deposit_collection.name, complete_deposit.id])
    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_dataset["entry-data1"],
        HTTP_X_CHECK_SWHID=incorrect_swhid,
    )

    assert response.status_code == status.HTTP_400_BAD_REQUEST
    assert b"Mismatched provided SWHID" in response.content
コード例 #6
0
def test_replace_metadata_to_deposit_is_possible(
    tmp_path,
    authenticated_client,
    partial_deposit_with_metadata,
    deposit_collection,
    atom_dataset,
    deposit_user,
):
    """Replace all metadata with another one should return a 204 response"""
    # given
    deposit = partial_deposit_with_metadata
    origin_url = deposit_user.provider_url + deposit.external_id
    raw_metadata0 = atom_dataset["entry-data0"] % origin_url

    requests_meta = DepositRequest.objects.filter(deposit=deposit,
                                                  type="metadata")
    assert len(requests_meta) == 1
    request_meta0 = requests_meta[0]
    assert request_meta0.raw_metadata == raw_metadata0

    requests_archive0 = DepositRequest.objects.filter(deposit=deposit,
                                                      type="archive")
    assert len(requests_archive0) == 1

    update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id])

    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_dataset["entry-data1"],
    )

    assert response.status_code == status.HTTP_204_NO_CONTENT

    requests_meta = DepositRequest.objects.filter(deposit=deposit,
                                                  type="metadata")

    assert len(requests_meta) == 1
    request_meta1 = requests_meta[0]
    raw_metadata1 = request_meta1.raw_metadata
    assert raw_metadata1 == atom_dataset["entry-data1"]
    assert raw_metadata0 != raw_metadata1
    assert request_meta0 != request_meta1

    # check we did not touch the other parts
    requests_archive1 = DepositRequest.objects.filter(deposit=deposit,
                                                      type="archive")
    assert len(requests_archive1) == 1
    assert set(requests_archive0) == set(requests_archive1)
コード例 #7
0
def test_put_metadata_to_em_iri_failure(authenticated_client,
                                        deposit_collection, partial_deposit,
                                        atom_dataset):
    """Update (PUT) archive with wrong content type should return 400"""
    # given
    deposit = partial_deposit
    # when
    update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_dataset["entry-data1"],
    )
    # then
    assert response.status_code == status.HTTP_400_BAD_REQUEST
    assert b"Packaging format supported is restricted" in response.content
    for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES:
        assert supported_format.encode() in response.content
コード例 #8
0
def test_replace_metadata_to_unknown_deposit(authenticated_client,
                                             deposit_collection, atom_dataset):
    """Adding metadata to unknown deposit should return a 404 response"""
    unknown_deposit_id = 998
    try:
        Deposit.objects.get(pk=unknown_deposit_id)
    except Deposit.DoesNotExist:
        assert True
    url = reverse(EDIT_IRI, args=[deposit_collection.name, unknown_deposit_id])
    response = put_atom(
        authenticated_client,
        url,
        data=atom_dataset["entry-data1"],
    )
    assert response.status_code == status.HTTP_404_NOT_FOUND
    response_content = parse_xml(response.content)
    assert (response_content.findtext(
        "atom:summary", namespaces=NAMESPACES) == "Deposit %s does not exist" %
            unknown_deposit_id)
コード例 #9
0
def test_put_update_metadata_done_deposit_failure_malformed_xml(
    tmp_path,
    authenticated_client,
    complete_deposit,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """failure: client updates metadata on deposit done with a malformed xml

    Response: 400

    """
    update_uri = reverse(EDIT_IRI,
                         args=[deposit_collection.name, complete_deposit.id])
    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_dataset["entry-data-ko"],
        HTTP_X_CHECK_SWHID=complete_deposit.swhid,
    )

    assert response.status_code == status.HTTP_400_BAD_REQUEST
    assert b"Malformed xml metadata" in response.content
コード例 #10
0
def test_put_update_metadata_done_deposit_nominal(
    tmp_path,
    authenticated_client,
    complete_deposit,
    deposit_collection,
    atom_dataset,
    sample_data,
    swh_storage,
):
    """Nominal scenario, client send an update of metadata on a deposit with status "done"
    with an existing swhid. Such swhid has its metadata updated accordingly both in
    the deposit backend and in the metadata storage.

    Response: 204

    """
    deposit_swhid = CoreSWHID.from_string(complete_deposit.swhid)
    assert deposit_swhid.object_type == ObjectType.DIRECTORY
    directory_id = hash_to_bytes(deposit_swhid.object_id)

    # directory targeted by the complete_deposit does not exist in the storage
    assert list(swh_storage.directory_missing([directory_id
                                               ])) == [directory_id]

    # so let's create a directory reference in the storage (current deposit targets an
    # unknown swhid)
    existing_directory = sample_data.directory
    swh_storage.directory_add([existing_directory])
    assert list(swh_storage.directory_missing([existing_directory.id])) == []

    # and patch one complete deposit swhid so it targets said reference
    complete_deposit.swhid = str(existing_directory.swhid())
    complete_deposit.save()

    actual_existing_requests_archive = DepositRequest.objects.filter(
        deposit=complete_deposit, type="archive")
    nb_archives = len(actual_existing_requests_archive)
    actual_existing_requests_metadata = DepositRequest.objects.filter(
        deposit=complete_deposit, type="metadata")
    nb_metadata = len(actual_existing_requests_metadata)

    update_uri = reverse(EDIT_IRI,
                         args=[deposit_collection.name, complete_deposit.id])
    response = put_atom(
        authenticated_client,
        update_uri,
        data=atom_dataset["entry-data1"],
        HTTP_X_CHECK_SWHID=complete_deposit.swhid,
    )

    assert response.status_code == status.HTTP_204_NO_CONTENT

    new_requests_meta = DepositRequest.objects.filter(deposit=complete_deposit,
                                                      type="metadata")
    assert len(new_requests_meta) == nb_metadata + 1
    request_meta1 = new_requests_meta[0]
    raw_metadata1 = request_meta1.raw_metadata
    assert raw_metadata1 == atom_dataset["entry-data1"]

    # check we did not touch the other parts
    requests_archive1 = DepositRequest.objects.filter(deposit=complete_deposit,
                                                      type="archive")
    assert len(requests_archive1) == nb_archives
    assert set(actual_existing_requests_archive) == set(requests_archive1)

    # Ensure metadata stored in the metadata storage is consistent
    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url=complete_deposit.client.provider_url,
    )

    actual_authority = swh_storage.metadata_authority_get(
        MetadataAuthorityType.DEPOSIT_CLIENT,
        url=complete_deposit.client.provider_url)
    assert actual_authority == metadata_authority

    config = APIConfig()
    metadata_fetcher = MetadataFetcher(
        name=config.tool["name"],
        version=config.tool["version"],
    )

    actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"],
                                                      config.tool["version"])
    assert actual_fetcher == metadata_fetcher

    directory_swhid = ExtendedSWHID.from_string(complete_deposit.swhid)
    page_results = swh_storage.raw_extrinsic_metadata_get(
        directory_swhid, metadata_authority)
    assert page_results == PagedResult(
        results=[
            RawExtrinsicMetadata(
                target=directory_swhid,
                discovery_date=request_meta1.date,
                authority=metadata_authority,
                fetcher=metadata_fetcher,
                format="sword-v2-atom-codemeta",
                metadata=raw_metadata1.encode(),
                origin=complete_deposit.origin_url,
            )
        ],
        next_page_token=None,
    )
コード例 #11
0
def test_post_deposit_then_update_refused(authenticated_client,
                                          deposit_collection, sample_archive,
                                          atom_dataset, tmp_path):
    """Updating a deposit with status 'ready' should return a 400"""
    tmp_path = str(tmp_path)
    url = reverse(COL_IRI, args=[deposit_collection.name])

    external_id = "some-external-id-1"

    # when
    response = post_archive(
        authenticated_client,
        url,
        sample_archive,
        HTTP_SLUG=external_id,
        HTTP_IN_PROGRESS="false",
    )

    # then
    assert response.status_code == status.HTTP_201_CREATED

    response_content = parse_xml(response.content)
    deposit_id = response_content.findtext("swh:deposit_id",
                                           namespaces=NAMESPACES)

    deposit = Deposit.objects.get(pk=deposit_id)
    assert deposit.status == DEPOSIT_STATUS_DEPOSITED
    assert deposit.external_id == external_id
    assert deposit.collection == deposit_collection
    assert deposit.swhid is None

    deposit_request = DepositRequest.objects.get(deposit=deposit)
    assert deposit_request.deposit == deposit
    check_archive(sample_archive["name"], deposit_request.archive.name)

    # updating/adding is forbidden

    # uri to update the content
    edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id])
    se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id])
    em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id])

    # Testing all update/add endpoint should fail
    # since the status is ready

    archive2 = create_arborescence_archive(tmp_path, "archive2", "file2",
                                           b"some content in file 2")

    # replacing file is no longer possible since the deposit's
    # status is ready
    r = put_archive(
        authenticated_client,
        em_iri,
        archive2,
        HTTP_SLUG=external_id,
        HTTP_IN_PROGRESS="false",
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")

    # adding file is no longer possible since the deposit's status
    # is ready
    r = post_archive(
        authenticated_client,
        em_iri,
        archive2,
        HTTP_SLUG=external_id,
        HTTP_IN_PROGRESS="false",
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")

    # replacing metadata is no longer possible since the deposit's
    # status is ready
    r = put_atom(
        authenticated_client,
        edit_iri,
        data=atom_dataset["entry-data-deposit-binary"],
        CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
        HTTP_SLUG=external_id,
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")

    # adding new metadata is no longer possible since the
    # deposit's status is ready
    r = post_atom(
        authenticated_client,
        se_iri,
        data=atom_dataset["entry-data-deposit-binary"],
        CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
        HTTP_SLUG=external_id,
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")

    archive_content = b"some content representing archive"
    archive = InMemoryUploadedFile(
        BytesIO(archive_content),
        field_name="archive0",
        name="archive0",
        content_type="application/zip",
        size=len(archive_content),
        charset=None,
    )

    atom_entry = InMemoryUploadedFile(
        BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")),
        field_name="atom0",
        name="atom0",
        content_type='application/atom+xml; charset="utf-8"',
        size=len(atom_dataset["entry-data-deposit-binary"]),
        charset="utf-8",
    )

    # replacing multipart metadata is no longer possible since the
    # deposit's status is ready
    r = authenticated_client.put(
        edit_iri,
        format="multipart",
        data={
            "archive": archive,
            "atom_entry": atom_entry,
        },
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")

    # adding new metadata is no longer possible since the
    # deposit's status is ready
    r = authenticated_client.post(
        se_iri,
        format="multipart",
        data={
            "archive": archive,
            "atom_entry": atom_entry,
        },
    )

    assert r.status_code == status.HTTP_400_BAD_REQUEST
    assert (ET.fromstring(r.content).findtext("atom:summary",
                                              namespaces=NAMESPACES) ==
            "You can only act on deposit with status 'partial'")