def test_put_atom_with_create_origin_and_external_identifier( authenticated_client, deposit_collection, atom_dataset, deposit_user): """<atom:external_identifier> was deprecated before <swh:create_origin> was introduced, clients should get an error when trying to use both """ external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) edit_iri = response_content.find("atom:link[@rel='edit']", namespaces=NAMESPACES).attrib["href"] # when response = put_atom( authenticated_client, edit_iri, data=atom_dataset["error-with-external-identifier"] % external_id, HTTP_IN_PROGRESS="false", ) assert b"<external_identifier> is deprecated" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_put_atom_with_create_origin_and_reference(authenticated_client, deposit_collection, atom_dataset, deposit_user): """<swh:reference> and <swh:create_origin> are mutually exclusive""" external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) response = post_atom( authenticated_client, url, data=atom_dataset["entry-data0"] % origin_url, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) edit_iri = response_content.find("atom:link[@rel='edit']", namespaces=NAMESPACES).attrib["href"] # when response = put_atom( authenticated_client, edit_iri, data=atom_dataset["entry-data-with-origin-reference"].format( url=origin_url), HTTP_IN_PROGRESS="false", ) assert b"only one may be used on a given deposit" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_put_update_metadata_done_deposit_failure_empty_xml( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done with an empty xml. Response: 400 """ update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) atom_content = atom_dataset["entry-data-empty-body"] response = put_atom( authenticated_client, update_uri, data=atom_content, HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Empty body request is not supported" in response.content
def test_put_update_metadata_done_deposit_failure_functional_checks( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done without required incomplete metadata Response: 400 """ update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = put_atom( authenticated_client, update_uri, # no title, nor author, nor name fields data=atom_dataset["entry-data-fail-metadata-functional-checks"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Functional metadata checks failure" in response.content # detail on the errors msg = (b"- Mandatory fields are missing (" b"atom:name or atom:title or codemeta:name, " b"atom:author or codemeta:author)") assert msg in response.content
def test_put_update_metadata_done_deposit_failure_mismatched_swhid( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit with SWHID not matching the deposit's. Response: 400 """ incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" assert complete_deposit.swhid != incorrect_swhid update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data1"], HTTP_X_CHECK_SWHID=incorrect_swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Mismatched provided SWHID" in response.content
def test_replace_metadata_to_deposit_is_possible( tmp_path, authenticated_client, partial_deposit_with_metadata, deposit_collection, atom_dataset, deposit_user, ): """Replace all metadata with another one should return a 204 response""" # given deposit = partial_deposit_with_metadata origin_url = deposit_user.provider_url + deposit.external_id raw_metadata0 = atom_dataset["entry-data0"] % origin_url requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta0 = requests_meta[0] assert request_meta0.raw_metadata == raw_metadata0 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_204_NO_CONTENT requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta1 = requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == atom_dataset["entry-data1"] assert raw_metadata0 != raw_metadata1 assert request_meta0 != request_meta1 # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1)
def test_put_metadata_to_em_iri_failure(authenticated_client, deposit_collection, partial_deposit, atom_dataset): """Update (PUT) archive with wrong content type should return 400""" # given deposit = partial_deposit # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data1"], ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Packaging format supported is restricted" in response.content for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: assert supported_format.encode() in response.content
def test_replace_metadata_to_unknown_deposit(authenticated_client, deposit_collection, atom_dataset): """Adding metadata to unknown deposit should return a 404 response""" unknown_deposit_id = 998 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EDIT_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = put_atom( authenticated_client, url, data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert (response_content.findtext( "atom:summary", namespaces=NAMESPACES) == "Deposit %s does not exist" % unknown_deposit_id)
def test_put_update_metadata_done_deposit_failure_malformed_xml( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done with a malformed xml Response: 400 """ update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data-ko"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Malformed xml metadata" in response.content
def test_put_update_metadata_done_deposit_nominal( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, sample_data, swh_storage, ): """Nominal scenario, client send an update of metadata on a deposit with status "done" with an existing swhid. Such swhid has its metadata updated accordingly both in the deposit backend and in the metadata storage. Response: 204 """ deposit_swhid = CoreSWHID.from_string(complete_deposit.swhid) assert deposit_swhid.object_type == ObjectType.DIRECTORY directory_id = hash_to_bytes(deposit_swhid.object_id) # directory targeted by the complete_deposit does not exist in the storage assert list(swh_storage.directory_missing([directory_id ])) == [directory_id] # so let's create a directory reference in the storage (current deposit targets an # unknown swhid) existing_directory = sample_data.directory swh_storage.directory_add([existing_directory]) assert list(swh_storage.directory_missing([existing_directory.id])) == [] # and patch one complete deposit swhid so it targets said reference complete_deposit.swhid = str(existing_directory.swhid()) complete_deposit.save() actual_existing_requests_archive = DepositRequest.objects.filter( deposit=complete_deposit, type="archive") nb_archives = len(actual_existing_requests_archive) actual_existing_requests_metadata = DepositRequest.objects.filter( deposit=complete_deposit, type="metadata") nb_metadata = len(actual_existing_requests_metadata) update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = put_atom( authenticated_client, update_uri, data=atom_dataset["entry-data1"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_204_NO_CONTENT new_requests_meta = DepositRequest.objects.filter(deposit=complete_deposit, type="metadata") assert len(new_requests_meta) == nb_metadata + 1 request_meta1 = new_requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == atom_dataset["entry-data1"] # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=complete_deposit, type="archive") assert len(requests_archive1) == nb_archives assert set(actual_existing_requests_archive) == set(requests_archive1) # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], ) actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"], config.tool["version"]) assert actual_fetcher == metadata_fetcher directory_swhid = ExtendedSWHID.from_string(complete_deposit.swhid) page_results = swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority) assert page_results == PagedResult( results=[ RawExtrinsicMetadata( target=directory_swhid, discovery_date=request_meta1.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata1.encode(), origin=complete_deposit.origin_url, ) ], next_page_token=None, )
def test_post_deposit_then_update_refused(authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path): """Updating a deposit with status 'ready' should return a 400""" tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content.findtext("swh:deposit_id", namespaces=NAMESPACES) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit check_archive(sample_archive["name"], deposit_request.archive.name) # updating/adding is forbidden # uri to update the content edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id]) se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id]) em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_archive(tmp_path, "archive2", "file2", b"some content in file 2") # replacing file is no longer possible since the deposit's # status is ready r = put_archive( authenticated_client, em_iri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding file is no longer possible since the deposit's status # is ready r = post_archive( authenticated_client, em_iri, archive2, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # replacing metadata is no longer possible since the deposit's # status is ready r = put_atom( authenticated_client, edit_iri, data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding new metadata is no longer possible since the # deposit's status is ready r = post_atom( authenticated_client, se_iri, data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), field_name="archive0", name="archive0", content_type="application/zip", size=len(archive_content), charset=None, ) atom_entry = InMemoryUploadedFile( BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), field_name="atom0", name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(atom_dataset["entry-data-deposit-binary"]), charset="utf-8", ) # replacing multipart metadata is no longer possible since the # deposit's status is ready r = authenticated_client.put( edit_iri, format="multipart", data={ "archive": archive, "atom_entry": atom_entry, }, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'") # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( se_iri, format="multipart", data={ "archive": archive, "atom_entry": atom_entry, }, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert (ET.fromstring(r.content).findtext("atom:summary", namespaces=NAMESPACES) == "You can only act on deposit with status 'partial'")