def test_QualifiedSWHID_parse_serialize_qualifiers(string, parsed): """Tests parsing and serializing valid SWHIDs with the various SWHID classes.""" if parsed is None: with pytest.raises(ValidationError): print(repr(QualifiedSWHID.from_string(string))) else: assert QualifiedSWHID.from_string(string) == parsed assert str(parsed) == string
def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success)""" deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id="external-id-complete", deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS, ) origin = "https://hal.archives-ouvertes.fr/hal-01727745" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0") deposit.swhid = f"swh:1:dir:{directory_id}" deposit.swhid_context = str( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin, visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id), anchor=CoreSWHID(object_type=ObjectType.RELEASE, object_id=release_id), path=b"/", ) ) deposit.save() return deposit
def test_QualifiedSWHID_init(object_type, qualifiers, expected): """Tests validation and converters of qualifiers""" if isinstance(expected, type): assert issubclass(expected, Exception) with pytest.raises(expected): QualifiedSWHID(object_type=object_type, object_id=_x(HASH), **qualifiers) else: assert isinstance(expected, str) swhid = QualifiedSWHID( object_type=object_type, object_id=_x(HASH), **qualifiers ) # Check the build object has the right serialization assert expected == str(swhid) # Check the internal state of the object is the same as if parsed from a string assert QualifiedSWHID.from_string(expected) == swhid
def test_QualifiedSWHID_serialize_origin(): """Checks that semicolon in origins are escaped.""" string = f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz" swhid = QualifiedSWHID( object_type=ObjectType.CONTENT, object_id=_x(HASH), origin="https://example.org/foo;bar%25baz", ) assert str(swhid) == string
def test_QualifiedSWHID_validation_error(ns, version, type, id, qualifiers): with pytest.raises(ValidationError): QualifiedSWHID( namespace=ns, scheme_version=version, object_type=type, object_id=_x(id), **qualifiers, )
def test_QualifiedSWHID_eq(): object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") assert QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id ) == QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) assert QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, ) == QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, ) assert QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, ) == QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, )
def process_put( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> None: """Update the deposit with status and SWHIDs Returns: 204 No content 400 Bad request if checks fail """ data = request.data status = data["status"] deposit.status = status if status == DEPOSIT_STATUS_LOAD_SUCCESS: origin_url = data["origin_url"] directory_id = data["directory_id"] release_id = data["release_id"] dir_id = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id)) snp_id = CoreSWHID( object_type=ObjectType.SNAPSHOT, object_id=hash_to_bytes(data["snapshot_id"]), ) rel_id = CoreSWHID(object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id)) deposit.swhid = str(dir_id) # new id with contextual information deposit.swhid_context = str( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin_url, visit=snp_id, anchor=rel_id, path="/", )) else: # rejected deposit.status = status if "status_detail" in data: deposit.status_detail = data["status_detail"] deposit.save()
def compute_metadata_context(swhid_reference: QualifiedSWHID) -> Dict[str, Any]: """Given a SWHID object, determine the context as a dict.""" metadata_context: Dict[str, Any] = {"origin": None} if swhid_reference.qualifiers(): metadata_context = { "origin": swhid_reference.origin, "path": swhid_reference.path, } snapshot = swhid_reference.visit if snapshot: metadata_context["snapshot"] = snapshot anchor = swhid_reference.anchor if anchor: metadata_context[anchor.object_type.name.lower()] = anchor return metadata_context
def test_QualifiedSWHID_hash(): object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2") assert hash( QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id) ) == hash(QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)) assert hash( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, ) ) == hash( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, **dummy_qualifiers, ) ) # Different order of the dictionary, so the underlying order of the tuple in # ImmutableDict is different. assert hash( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, origin="https://example.com", lines=(42, None), ) ) == hash( QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=object_id, lines=(42, None), origin="https://example.com", ) )
def parse_swh_reference( metadata: ElementTree.Element, ) -> Optional[Union[QualifiedSWHID, str]]: """Parse <swh:reference> within the metadata document, if any. .. code-block:: xml <swh:deposit> <swh:reference> <swh:origin url='https://github.com/user/repo'/> </swh:reference> </swh:deposit> or: .. code-block:: xml <swh:deposit> <swh:reference> <swh:object swhid="swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/" /> </swh:deposit> Args: metadata: result of parsing an Atom document Raises: ValidationError in case the swhid referenced (if any) is invalid Returns: Either swhid or origin reference if any. None otherwise. """ # noqa ref_origin = metadata.find( "swh:deposit/swh:reference/swh:origin[@url]", namespaces=NAMESPACES ) if ref_origin is not None: return ref_origin.attrib["url"] ref_object = metadata.find( "swh:deposit/swh:reference/swh:object[@swhid]", namespaces=NAMESPACES ) if ref_object is None: return None swhid = ref_object.attrib["swhid"] if not swhid: return None swhid_reference = QualifiedSWHID.from_string(swhid) if swhid_reference.qualifiers(): anchor = swhid_reference.anchor if anchor: if anchor.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE: error_msg = ( "anchor qualifier should be a core SWHID with type one of " f"{', '.join(t.name.lower() for t in ALLOWED_QUALIFIERS_NODE_TYPE)}" ) raise ValidationError(error_msg) visit = swhid_reference.visit if visit: if visit.object_type != ObjectType.SNAPSHOT: raise ValidationError( f"visit qualifier should be a core SWHID with type snp, " f"not {visit.object_type.value}" ) if ( visit and anchor and visit.object_type == ObjectType.SNAPSHOT and anchor.object_type == ObjectType.SNAPSHOT ): logger.warn( "SWHID use of both anchor and visit targeting " f"a snapshot: {swhid_reference}" ) raise ValidationError( "'anchor=swh:1:snp:' is not supported when 'visit' is also provided." ) return swhid_reference
def test_deposit_metadata_swhid( swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage, ): """Posting a swhid reference is stored on raw extrinsic metadata storage""" swhid_reference = QualifiedSWHID.from_string(swhid) swhid_target = extended_swhid_from_qualified(swhid_reference) xml_data = atom_dataset["entry-data-with-swhid"].format( swhid=swhid, metadata_provenance_url= "https://hal-test.archives-ouvertes.fr/hal-abcdefgh", ) deposit_client = authenticated_client.deposit_client _insert_object(swh_storage, swhid_reference) response = post_atom( authenticated_client, reverse(COL_IRI, args=[deposit_collection.name]), data=xml_data, ) assert response.status_code == status.HTTP_201_CREATED, response.content.decode( ) response_content = ElementTree.fromstring(response.content) # Ensure the deposit is finalized deposit_id = int( response_content.findtext("swh:deposit_id", namespaces=NAMESPACES)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.swhid == str(swhid_target) assert deposit.swhid_context == str(swhid_reference) assert deposit.complete_date == deposit.reception_date assert deposit.complete_date is not None assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], ) actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"], config.tool["version"]) assert actual_fetcher == metadata_fetcher # Get the deposited metadata object and check it: page_results = swh_storage.raw_extrinsic_metadata_get( swhid_target, metadata_authority) assert len(page_results.results) == 1 assert page_results.next_page_token is None metadata_context = compute_metadata_context(swhid_reference) metadata = RawExtrinsicMetadata( target=swhid_target, discovery_date=deposit.complete_date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=xml_data.encode(), **metadata_context, ) assert page_results == PagedResult( results=[metadata], next_page_token=None, ) # Get metadata about the deposited metadata object and check it: _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit, metadata_fetcher)
with pytest.raises(ValidationError): swhid_class.from_string(invalid_swhid) # string SWHIDs, and how they should be parsed by each of the classes, # or None if the class does not support it HASH = "94a9ed024d3859793618152ea559a168bbcbb5e2" VALID_SWHIDS = [ ( f"swh:1:cnt:{HASH}", CoreSWHID( object_type=ObjectType.CONTENT, object_id=_x(HASH), ), QualifiedSWHID( object_type=ObjectType.CONTENT, object_id=_x(HASH), ), ExtendedSWHID( object_type=ExtendedObjectType.CONTENT, object_id=_x(HASH), ), ), ( f"swh:1:dir:{HASH}", CoreSWHID( object_type=ObjectType.DIRECTORY, object_id=_x(HASH), ), QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=_x(HASH),
def handle_deposit_row( row, discovery_date: Optional[datetime.datetime], origin, storage, deposit_cur, dry_run: bool, ): """Loads metadata from the deposit database (which is more reliable as the metadata on the revision object, as some versions of the deposit loader were a bit lossy; and they used very different format for the field in the revision table). """ parsed_message = deposit_revision_message_re.match(row["message"]) assert parsed_message is not None, row["message"] deposit_id = int(parsed_message.group("deposit_id")) collection = parsed_message.group("collection").decode() client_name = parsed_message.group("client").decode() deposit_cur.execute( f"SELECT {', '.join(DEPOSIT_COLS)} FROM deposit " f"INNER JOIN deposit_collection " f" ON (deposit.collection_id=deposit_collection.id) " f"INNER JOIN deposit_client ON (deposit.client_id=deposit_client.user_ptr_id) " f"INNER JOIN auth_user ON (deposit.client_id=auth_user.id) " f"INNER JOIN deposit_request ON (deposit.id=deposit_request.deposit_id) " f"WHERE deposit.id = %s", (deposit_id, ), ) provider_urls = set() swhids = set() metadata_entries = [] dates = set() external_identifiers = set() for deposit_request_row in deposit_cur: deposit_request = dict(zip(DEPOSIT_COLS, deposit_request_row)) # Sanity checks to make sure we selected the right deposit assert deposit_request["deposit.id"] == deposit_id assert deposit_request[ "deposit_collection.name"] == collection, deposit_request if client_name != "": # Sometimes it's missing from the commit message assert deposit_request["auth_user.username"] == client_name # Date of the deposit request (either the initial request, of subsequent ones) date = deposit_request["deposit_request.date"] dates.add(date) if deposit_request["deposit.external_id"] == "hal-02355563": # Failed deposit swhids.add("swh:1:rev:9293f230baca9814490d4fff7ac53d487a20edb6" ";origin=https://hal.archives-ouvertes.fr/hal-02355563") else: assert deposit_request["deposit.swhid_context"], deposit_request swhids.add(deposit_request["deposit.swhid_context"]) external_identifiers.add(deposit_request["deposit.external_id"]) # Client of the deposit provider_urls.add(deposit_request["deposit_client.provider_url"]) metadata = deposit_request["deposit_request.metadata"] if metadata is not None: json.dumps(metadata).encode() # check it's valid if "@xmlns" in metadata: assert metadata["@xmlns"] == ATOM_NS assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS]) format = NEW_DEPOSIT_FORMAT elif "{http://www.w3.org/2005/Atom}id" in metadata: assert ("{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" in metadata or "{http://www.w3.org/2005/Atom}author" in metadata) format = OLD_DEPOSIT_FORMAT else: # new format introduced in # https://forge.softwareheritage.org/D4065 # it's the same as the first case, but with the @xmlns # declarations stripped # Most of them should have the "id", but some revisions, # like 4d3890004fade1f4ec3bf7004a4af0c490605128, are missing # this field assert "id" in metadata or "title" in metadata assert "codemeta:author" in metadata format = NEW_DEPOSIT_FORMAT metadata_entries.append((date, format, metadata)) if discovery_date is None: discovery_date = max(dates) # Sanity checks to make sure deposit requests are consistent with each other assert len(metadata_entries) >= 1, deposit_id assert len( provider_urls) == 1, f"expected 1 provider url, got {provider_urls}" (provider_url, ) = provider_urls assert len(swhids) == 1 (swhid, ) = swhids assert (len(external_identifiers) == 1 ), f"expected 1 external identifier, got {external_identifiers}" (external_identifier, ) = external_identifiers # computed the origin from the external_identifier if we don't have one if origin is None: origin = f"{provider_url.strip('/')}/{external_identifier}" # explicit list of mistakes that happened in the past, but shouldn't # happen again: if origin == "https://hal.archives-ouvertes.fr/hal-01588781": # deposit id 75 origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781" elif origin == "https://hal.archives-ouvertes.fr/hal-01588782": # deposit id 76 origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588782" elif origin == "https://hal.archives-ouvertes.fr/hal-01592430": # deposit id 143 origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592430" elif origin == "https://hal.archives-ouvertes.fr/hal-01588927": origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588927" elif origin == "https://hal.archives-ouvertes.fr/hal-01593875": # deposit id 175 origin = "https://hal-preprod.archives-ouvertes.fr/hal-01593875" elif deposit_id == 160: assert origin == "https://www.softwareheritage.org/je-suis-gpl", origin origin = "https://forge.softwareheritage.org/source/jesuisgpl/" elif origin == "https://hal.archives-ouvertes.fr/hal-01588942": # deposit id 90 origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588942" elif origin == "https://hal.archives-ouvertes.fr/hal-01592499": # deposit id 162 origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592499" elif origin == "https://hal.archives-ouvertes.fr/hal-01588935": # deposit id 89 origin = "https://hal-preprod.archives-ouvertes.fr/hal-01588935" assert_origin_exists(storage, origin) # check the origin we computed matches the one in the deposit db swhid_origin = QualifiedSWHID.from_string(swhid).origin if origin is not None: # explicit list of mistakes that happened in the past, but shouldn't # happen again: exceptions = [ ( # deposit id 229 "https://hal.archives-ouvertes.fr/hal-01243573", "https://hal-test.archives-ouvertes.fr/hal-01243573", ), ( # deposit id 199 "https://hal.archives-ouvertes.fr/hal-01243065", "https://hal-test.archives-ouvertes.fr/hal-01243065", ), ( # deposit id 164 "https://hal.archives-ouvertes.fr/hal-01593855", "https://hal-preprod.archives-ouvertes.fr/hal-01593855", ), ] if (origin, swhid_origin) not in exceptions: assert origin == swhid_origin, ( f"the origin we guessed from the deposit db or revision ({origin}) " f"doesn't match the one in the deposit db's SWHID ({swhid})") authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=provider_url, metadata={}, ) for (date, format, metadata) in metadata_entries: load_metadata( storage, row["id"], row["directory"], date, metadata, format, authority=authority, origin=origin, dry_run=dry_run, ) return (origin, discovery_date)
def deposit_list_datatables(request: Request) -> JsonResponse: """Special API view to list and filter deposits, produced responses are intended to be consumed by datatables js framework used in deposits admin Web UI.""" table_data: Dict[str, Any] = {} table_data["draw"] = int(request.GET.get("draw", 1)) try: username = request.GET.get("username") if username: deposits = Deposit.objects.select_related("client").filter( client__username=username) else: deposits = Deposit.objects.all() deposits_count = deposits.count() search_value = request.GET.get("search[value]") if search_value: deposits = deposits.filter(_deposit_search_query(search_value)) exclude_pattern = request.GET.get("excludePattern") if exclude_pattern: deposits = deposits.exclude(_deposit_search_query(exclude_pattern)) column_order = request.GET.get("order[0][column]") field_order = request.GET.get("columns[%s][name]" % column_order, "id") order_dir = request.GET.get("order[0][dir]", "desc") if order_dir == "desc": field_order = "-" + field_order deposits = deposits.order_by(field_order) length = int(request.GET.get("length", 10)) page = int(request.GET.get("start", 0)) // length + 1 paginator = Paginator(deposits, length) data = [ DepositSerializer(_enrich_deposit_with_metadata(d)).data for d in paginator.page(page).object_list ] table_data["recordsTotal"] = deposits_count table_data["recordsFiltered"] = deposits.count() data_list = [] for d in data: data_dict = { "id": d["id"], "type": d["type"], "external_id": d["external_id"], "raw_metadata": d["raw_metadata"], "reception_date": d["reception_date"], "status": d["status"], "status_detail": d["status_detail"], "swhid": d["swhid"], "swhid_context": d["swhid_context"], } provenance = None raw_metadata = d["raw_metadata"] # for meta deposit, the uri should be the url provenance if raw_metadata and d["type"] == "meta": # metadata provenance provenance = parse_swh_metadata_provenance( ElementTree.fromstring(raw_metadata)) # For code deposits the uri is the origin # First, trying to determine it out of the raw metadata associated with the # deposit elif raw_metadata and d["type"] == "code": create_origin_url, add_to_origin_url = parse_swh_deposit_origin( ElementTree.fromstring(raw_metadata)) provenance = create_origin_url or add_to_origin_url # For code deposits, if not provided, use the origin_url if not provenance and d["type"] == "code": if d["origin_url"]: provenance = d["origin_url"] # If still not found, fallback using the swhid context if not provenance and d["swhid_context"]: swhid = QualifiedSWHID.from_string(d["swhid_context"]) provenance = swhid.origin data_dict["uri"] = provenance # could be None data_list.append(data_dict) table_data["data"] = data_list except Exception as exc: sentry_sdk.capture_exception(exc) table_data[ "error"] = "An error occurred while retrieving the list of deposits !" if settings.DEBUG: table_data["error"] += "\n" + str(exc) return JsonResponse(table_data)
def test_migrations_22_add_deposit_type_column_model_and_data(migrator): """22 migration should add the type column and migrate old values with new type""" from swh.deposit.models import ( DEPOSIT_CODE, DEPOSIT_METADATA_ONLY, Deposit, DepositClient, DepositCollection, ) old_state = migrator.apply_initial_migration( ("deposit", "0021_deposit_origin_url_20201124_1438")) old_deposit = old_state.apps.get_model("deposit", "Deposit") collection = DepositCollection.objects.create(name="hello") client = DepositClient.objects.create(username="******", collections=[collection.id]) # Create old deposits to make sure they are migrated properly deposit1 = old_deposit.objects.create(status="partial", client_id=client.id, collection_id=collection.id) deposit2 = old_deposit.objects.create(status="verified", client_id=client.id, collection_id=collection.id) origin = "https://hal.archives-ouvertes.fr/hal-01727745" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0") date_now = now() # metadata deposit deposit3 = old_deposit.objects.create( status=DEPOSIT_STATUS_LOAD_SUCCESS, client_id=client.id, collection_id=collection.id, swhid=CoreSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), ), swhid_context=QualifiedSWHID( object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), origin=origin, visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id), anchor=CoreSWHID(object_type=ObjectType.RELEASE, object_id=release_id), path=b"/", ), ) # work around (complete date is installed on creation) deposit3.complete_date = date_now deposit3.reception_date = date_now deposit3.save() assert hasattr(old_deposit, "type") is False # Migrate to the latest schema new_state = migrator.apply_tested_migration( ("deposit", "0022_auto_20220223_1542")) new_deposit = new_state.apps.get_model("deposit", "Deposit") assert hasattr(new_deposit, "type") is True assert Deposit().type == DEPOSIT_CODE all_deposits = Deposit.objects.all() assert len(all_deposits) == 3 for deposit in all_deposits: if deposit.id in (deposit1.id, deposit2.id): assert deposit.type == DEPOSIT_CODE else: assert deposit.id == deposit3.id and deposit.type == DEPOSIT_METADATA_ONLY
def process_put( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> None: """This allows the following scenarios: - multipart: replace all the deposit (status partial) metadata and archive with the provided ones. - atom: replace all the deposit (status partial) metadata with the provided ones. - with swhid, atom: Add new metatada to deposit (status done) with provided ones and push such metadata to the metadata storage directly. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart Raises: 400 if any of the following occur: - the swhid provided and the deposit swhid do not match - the provided metadata xml file is malformed - the provided xml atom entry is empty - the provided swhid does not exist in the archive """ # noqa swhid = headers.swhid if swhid is None: if request.content_type.startswith("multipart/"): self._multipart_upload( request, headers, collection_name, deposit=deposit, replace_archives=True, replace_metadata=True, ) else: # standard metadata update (replace all metadata already provided to the # deposit by the new ones) self._atom_entry( request, headers, collection_name, deposit=deposit, replace_metadata=True, ) return # Update metadata on a deposit already ingested # Write to the metadata storage (and the deposit backend) # no ingestion triggered assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS if swhid != deposit.swhid: raise DepositError( BAD_REQUEST, f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.", "The provided SWHID does not match the deposit to update. " "Please ensure you send the correct deposit SWHID.", ) try: raw_metadata, metadata_tree = self._read_metadata(request.data) except ParserError: raise DepositError( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if len(metadata_tree) == 0: raise DepositError( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) _, deposit, deposit_request = self._store_metadata_deposit( deposit, QualifiedSWHID.from_string(swhid), metadata_tree, raw_metadata, deposit.origin_url, )