예제 #1
0
def test_QualifiedSWHID_parse_serialize_qualifiers(string, parsed):
    """Tests parsing and serializing valid SWHIDs with the various SWHID classes."""
    if parsed is None:
        with pytest.raises(ValidationError):
            print(repr(QualifiedSWHID.from_string(string)))
    else:
        assert QualifiedSWHID.from_string(string) == parsed
        assert str(parsed) == string
예제 #2
0
def complete_deposit(sample_archive, deposit_collection, authenticated_client):
    """Returns a completed deposit (load success)"""
    deposit = create_deposit(
        authenticated_client,
        deposit_collection.name,
        sample_archive,
        external_id="external-id-complete",
        deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS,
    )
    origin = "https://hal.archives-ouvertes.fr/hal-01727745"
    directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b"
    release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10")
    snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0")
    deposit.swhid = f"swh:1:dir:{directory_id}"
    deposit.swhid_context = str(
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=hash_to_bytes(directory_id),
            origin=origin,
            visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id),
            anchor=CoreSWHID(object_type=ObjectType.RELEASE, object_id=release_id),
            path=b"/",
        )
    )
    deposit.save()
    return deposit
예제 #3
0
def test_QualifiedSWHID_init(object_type, qualifiers, expected):
    """Tests validation and converters of qualifiers"""
    if isinstance(expected, type):
        assert issubclass(expected, Exception)
        with pytest.raises(expected):
            QualifiedSWHID(object_type=object_type, object_id=_x(HASH), **qualifiers)
    else:
        assert isinstance(expected, str)
        swhid = QualifiedSWHID(
            object_type=object_type, object_id=_x(HASH), **qualifiers
        )

        # Check the build object has the right serialization
        assert expected == str(swhid)

        # Check the internal state of the object is the same as if parsed from a string
        assert QualifiedSWHID.from_string(expected) == swhid
예제 #4
0
def test_QualifiedSWHID_serialize_origin():
    """Checks that semicolon in origins are escaped."""
    string = f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz"
    swhid = QualifiedSWHID(
        object_type=ObjectType.CONTENT,
        object_id=_x(HASH),
        origin="https://example.org/foo;bar%25baz",
    )
    assert str(swhid) == string
예제 #5
0
def test_QualifiedSWHID_validation_error(ns, version, type, id, qualifiers):
    with pytest.raises(ValidationError):
        QualifiedSWHID(
            namespace=ns,
            scheme_version=version,
            object_type=type,
            object_id=_x(id),
            **qualifiers,
        )
예제 #6
0
def test_QualifiedSWHID_eq():
    object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")

    assert QualifiedSWHID(
        object_type=ObjectType.DIRECTORY, object_id=object_id
    ) == QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)

    assert QualifiedSWHID(
        object_type=ObjectType.DIRECTORY,
        object_id=object_id,
        **dummy_qualifiers,
    ) == QualifiedSWHID(
        object_type=ObjectType.DIRECTORY,
        object_id=object_id,
        **dummy_qualifiers,
    )

    assert QualifiedSWHID(
        object_type=ObjectType.DIRECTORY,
        object_id=object_id,
        **dummy_qualifiers,
    ) == QualifiedSWHID(
        object_type=ObjectType.DIRECTORY,
        object_id=object_id,
        **dummy_qualifiers,
    )
    def process_put(
        self,
        request,
        headers: ParsedRequestHeaders,
        collection_name: str,
        deposit: Deposit,
    ) -> None:
        """Update the deposit with status and SWHIDs

        Returns:
            204 No content
            400 Bad request if checks fail

        """
        data = request.data

        status = data["status"]
        deposit.status = status
        if status == DEPOSIT_STATUS_LOAD_SUCCESS:
            origin_url = data["origin_url"]
            directory_id = data["directory_id"]
            release_id = data["release_id"]
            dir_id = CoreSWHID(object_type=ObjectType.DIRECTORY,
                               object_id=hash_to_bytes(directory_id))
            snp_id = CoreSWHID(
                object_type=ObjectType.SNAPSHOT,
                object_id=hash_to_bytes(data["snapshot_id"]),
            )
            rel_id = CoreSWHID(object_type=ObjectType.RELEASE,
                               object_id=hash_to_bytes(release_id))

            deposit.swhid = str(dir_id)
            # new id with contextual information
            deposit.swhid_context = str(
                QualifiedSWHID(
                    object_type=ObjectType.DIRECTORY,
                    object_id=hash_to_bytes(directory_id),
                    origin=origin_url,
                    visit=snp_id,
                    anchor=rel_id,
                    path="/",
                ))
        else:  # rejected
            deposit.status = status

        if "status_detail" in data:
            deposit.status_detail = data["status_detail"]

        deposit.save()
예제 #8
0
def compute_metadata_context(swhid_reference: QualifiedSWHID) -> Dict[str, Any]:
    """Given a SWHID object, determine the context as a dict."""
    metadata_context: Dict[str, Any] = {"origin": None}
    if swhid_reference.qualifiers():
        metadata_context = {
            "origin": swhid_reference.origin,
            "path": swhid_reference.path,
        }
        snapshot = swhid_reference.visit
        if snapshot:
            metadata_context["snapshot"] = snapshot

        anchor = swhid_reference.anchor
        if anchor:
            metadata_context[anchor.object_type.name.lower()] = anchor

    return metadata_context
예제 #9
0
def test_QualifiedSWHID_hash():
    object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")

    assert hash(
        QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
    ) == hash(QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id))

    assert hash(
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=object_id,
            **dummy_qualifiers,
        )
    ) == hash(
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=object_id,
            **dummy_qualifiers,
        )
    )

    # Different order of the dictionary, so the underlying order of the tuple in
    # ImmutableDict is different.
    assert hash(
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=object_id,
            origin="https://example.com",
            lines=(42, None),
        )
    ) == hash(
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=object_id,
            lines=(42, None),
            origin="https://example.com",
        )
    )
예제 #10
0
def parse_swh_reference(
    metadata: ElementTree.Element,
) -> Optional[Union[QualifiedSWHID, str]]:
    """Parse <swh:reference> within the metadata document, if any.

    .. code-block:: xml

       <swh:deposit>
         <swh:reference>
           <swh:origin url='https://github.com/user/repo'/>
         </swh:reference>
       </swh:deposit>

    or:

    .. code-block:: xml

       <swh:deposit>
         <swh:reference>
           <swh:object swhid="swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/" />
       </swh:deposit>

    Args:
        metadata: result of parsing an Atom document

    Raises:
        ValidationError in case the swhid referenced (if any) is invalid

    Returns:
        Either swhid or origin reference if any. None otherwise.

    """  # noqa
    ref_origin = metadata.find(
        "swh:deposit/swh:reference/swh:origin[@url]", namespaces=NAMESPACES
    )
    if ref_origin is not None:
        return ref_origin.attrib["url"]

    ref_object = metadata.find(
        "swh:deposit/swh:reference/swh:object[@swhid]", namespaces=NAMESPACES
    )
    if ref_object is None:
        return None
    swhid = ref_object.attrib["swhid"]
    if not swhid:
        return None

    swhid_reference = QualifiedSWHID.from_string(swhid)

    if swhid_reference.qualifiers():
        anchor = swhid_reference.anchor
        if anchor:
            if anchor.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
                error_msg = (
                    "anchor qualifier should be a core SWHID with type one of "
                    f"{', '.join(t.name.lower() for t in ALLOWED_QUALIFIERS_NODE_TYPE)}"
                )
                raise ValidationError(error_msg)

        visit = swhid_reference.visit
        if visit:
            if visit.object_type != ObjectType.SNAPSHOT:
                raise ValidationError(
                    f"visit qualifier should be a core SWHID with type snp, "
                    f"not {visit.object_type.value}"
                )

        if (
            visit
            and anchor
            and visit.object_type == ObjectType.SNAPSHOT
            and anchor.object_type == ObjectType.SNAPSHOT
        ):
            logger.warn(
                "SWHID use of both anchor and visit targeting "
                f"a snapshot: {swhid_reference}"
            )
            raise ValidationError(
                "'anchor=swh:1:snp:' is not supported when 'visit' is also provided."
            )

    return swhid_reference
예제 #11
0
def test_deposit_metadata_swhid(
    swhid,
    authenticated_client,
    deposit_collection,
    atom_dataset,
    swh_storage,
):
    """Posting a swhid reference is stored on raw extrinsic metadata storage"""
    swhid_reference = QualifiedSWHID.from_string(swhid)
    swhid_target = extended_swhid_from_qualified(swhid_reference)

    xml_data = atom_dataset["entry-data-with-swhid"].format(
        swhid=swhid,
        metadata_provenance_url=
        "https://hal-test.archives-ouvertes.fr/hal-abcdefgh",
    )
    deposit_client = authenticated_client.deposit_client

    _insert_object(swh_storage, swhid_reference)

    response = post_atom(
        authenticated_client,
        reverse(COL_IRI, args=[deposit_collection.name]),
        data=xml_data,
    )

    assert response.status_code == status.HTTP_201_CREATED, response.content.decode(
    )
    response_content = ElementTree.fromstring(response.content)

    # Ensure the deposit is finalized
    deposit_id = int(
        response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
    deposit = Deposit.objects.get(pk=deposit_id)
    assert deposit.swhid == str(swhid_target)
    assert deposit.swhid_context == str(swhid_reference)
    assert deposit.complete_date == deposit.reception_date
    assert deposit.complete_date is not None
    assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS

    # Ensure metadata stored in the metadata storage is consistent
    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url=deposit_client.provider_url,
    )

    actual_authority = swh_storage.metadata_authority_get(
        MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url)
    assert actual_authority == metadata_authority

    config = APIConfig()
    metadata_fetcher = MetadataFetcher(
        name=config.tool["name"],
        version=config.tool["version"],
    )

    actual_fetcher = swh_storage.metadata_fetcher_get(config.tool["name"],
                                                      config.tool["version"])
    assert actual_fetcher == metadata_fetcher

    # Get the deposited metadata object and check it:

    page_results = swh_storage.raw_extrinsic_metadata_get(
        swhid_target, metadata_authority)

    assert len(page_results.results) == 1
    assert page_results.next_page_token is None

    metadata_context = compute_metadata_context(swhid_reference)
    metadata = RawExtrinsicMetadata(
        target=swhid_target,
        discovery_date=deposit.complete_date,
        authority=metadata_authority,
        fetcher=metadata_fetcher,
        format="sword-v2-atom-codemeta",
        metadata=xml_data.encode(),
        **metadata_context,
    )
    assert page_results == PagedResult(
        results=[metadata],
        next_page_token=None,
    )

    # Get metadata about the deposited metadata object and check it:
    _assert_deposit_info_on_metadata(swh_storage, metadata.swhid(), deposit,
                                     metadata_fetcher)
예제 #12
0
    with pytest.raises(ValidationError):
        swhid_class.from_string(invalid_swhid)


# string SWHIDs, and how they should be parsed by each of the classes,
# or None if the class does not support it
HASH = "94a9ed024d3859793618152ea559a168bbcbb5e2"
VALID_SWHIDS = [
    (
        f"swh:1:cnt:{HASH}",
        CoreSWHID(
            object_type=ObjectType.CONTENT,
            object_id=_x(HASH),
        ),
        QualifiedSWHID(
            object_type=ObjectType.CONTENT,
            object_id=_x(HASH),
        ),
        ExtendedSWHID(
            object_type=ExtendedObjectType.CONTENT,
            object_id=_x(HASH),
        ),
    ),
    (
        f"swh:1:dir:{HASH}",
        CoreSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=_x(HASH),
        ),
        QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=_x(HASH),
예제 #13
0
def handle_deposit_row(
    row,
    discovery_date: Optional[datetime.datetime],
    origin,
    storage,
    deposit_cur,
    dry_run: bool,
):
    """Loads metadata from the deposit database (which is more reliable as the
    metadata on the revision object, as some versions of the deposit loader were
    a bit lossy; and they used very different format for the field in the
    revision table).
    """
    parsed_message = deposit_revision_message_re.match(row["message"])
    assert parsed_message is not None, row["message"]

    deposit_id = int(parsed_message.group("deposit_id"))
    collection = parsed_message.group("collection").decode()
    client_name = parsed_message.group("client").decode()

    deposit_cur.execute(
        f"SELECT {', '.join(DEPOSIT_COLS)} FROM deposit "
        f"INNER JOIN deposit_collection "
        f" ON (deposit.collection_id=deposit_collection.id) "
        f"INNER JOIN deposit_client ON (deposit.client_id=deposit_client.user_ptr_id) "
        f"INNER JOIN auth_user ON (deposit.client_id=auth_user.id) "
        f"INNER JOIN deposit_request ON (deposit.id=deposit_request.deposit_id) "
        f"WHERE deposit.id = %s",
        (deposit_id, ),
    )

    provider_urls = set()
    swhids = set()
    metadata_entries = []
    dates = set()
    external_identifiers = set()
    for deposit_request_row in deposit_cur:
        deposit_request = dict(zip(DEPOSIT_COLS, deposit_request_row))

        # Sanity checks to make sure we selected the right deposit
        assert deposit_request["deposit.id"] == deposit_id
        assert deposit_request[
            "deposit_collection.name"] == collection, deposit_request
        if client_name != "":
            # Sometimes it's missing from the commit message
            assert deposit_request["auth_user.username"] == client_name

        # Date of the deposit request (either the initial request, of subsequent ones)
        date = deposit_request["deposit_request.date"]
        dates.add(date)

        if deposit_request["deposit.external_id"] == "hal-02355563":
            # Failed deposit
            swhids.add("swh:1:rev:9293f230baca9814490d4fff7ac53d487a20edb6"
                       ";origin=https://hal.archives-ouvertes.fr/hal-02355563")
        else:
            assert deposit_request["deposit.swhid_context"], deposit_request
            swhids.add(deposit_request["deposit.swhid_context"])
        external_identifiers.add(deposit_request["deposit.external_id"])

        # Client of the deposit
        provider_urls.add(deposit_request["deposit_client.provider_url"])

        metadata = deposit_request["deposit_request.metadata"]
        if metadata is not None:
            json.dumps(metadata).encode()  # check it's valid
            if "@xmlns" in metadata:
                assert metadata["@xmlns"] == ATOM_NS
                assert metadata["@xmlns:codemeta"] in (CODEMETA_NS,
                                                       [CODEMETA_NS])
                format = NEW_DEPOSIT_FORMAT
            elif "{http://www.w3.org/2005/Atom}id" in metadata:
                assert ("{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author"
                        in metadata
                        or "{http://www.w3.org/2005/Atom}author" in metadata)
                format = OLD_DEPOSIT_FORMAT
            else:
                # new format introduced in
                # https://forge.softwareheritage.org/D4065
                # it's the same as the first case, but with the @xmlns
                # declarations stripped
                # Most of them should have the "id", but some revisions,
                # like 4d3890004fade1f4ec3bf7004a4af0c490605128, are missing
                # this field
                assert "id" in metadata or "title" in metadata
                assert "codemeta:author" in metadata
                format = NEW_DEPOSIT_FORMAT
            metadata_entries.append((date, format, metadata))

    if discovery_date is None:
        discovery_date = max(dates)

    # Sanity checks to make sure deposit requests are consistent with each other
    assert len(metadata_entries) >= 1, deposit_id
    assert len(
        provider_urls) == 1, f"expected 1 provider url, got {provider_urls}"
    (provider_url, ) = provider_urls
    assert len(swhids) == 1
    (swhid, ) = swhids
    assert (len(external_identifiers) == 1
            ), f"expected 1 external identifier, got {external_identifiers}"
    (external_identifier, ) = external_identifiers

    # computed the origin from the external_identifier if we don't have one
    if origin is None:
        origin = f"{provider_url.strip('/')}/{external_identifier}"

        # explicit list of mistakes that happened in the past, but shouldn't
        # happen again:
        if origin == "https://hal.archives-ouvertes.fr/hal-01588781":
            # deposit id 75
            origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01588782":
            # deposit id 76
            origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588782"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01592430":
            # deposit id 143
            origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592430"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01588927":
            origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588927"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01593875":
            # deposit id 175
            origin = "https://hal-preprod.archives-ouvertes.fr/hal-01593875"
        elif deposit_id == 160:
            assert origin == "https://www.softwareheritage.org/je-suis-gpl", origin
            origin = "https://forge.softwareheritage.org/source/jesuisgpl/"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01588942":
            # deposit id 90
            origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588942"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01592499":
            # deposit id 162
            origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592499"
        elif origin == "https://hal.archives-ouvertes.fr/hal-01588935":
            # deposit id 89
            origin = "https://hal-preprod.archives-ouvertes.fr/hal-01588935"

        assert_origin_exists(storage, origin)

    # check the origin we computed matches the one in the deposit db
    swhid_origin = QualifiedSWHID.from_string(swhid).origin
    if origin is not None:
        # explicit list of mistakes that happened in the past, but shouldn't
        # happen again:
        exceptions = [
            (
                # deposit id 229
                "https://hal.archives-ouvertes.fr/hal-01243573",
                "https://hal-test.archives-ouvertes.fr/hal-01243573",
            ),
            (
                # deposit id 199
                "https://hal.archives-ouvertes.fr/hal-01243065",
                "https://hal-test.archives-ouvertes.fr/hal-01243065",
            ),
            (
                # deposit id 164
                "https://hal.archives-ouvertes.fr/hal-01593855",
                "https://hal-preprod.archives-ouvertes.fr/hal-01593855",
            ),
        ]
        if (origin, swhid_origin) not in exceptions:
            assert origin == swhid_origin, (
                f"the origin we guessed from the deposit db or revision ({origin}) "
                f"doesn't match the one in the deposit db's SWHID ({swhid})")

    authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url=provider_url,
        metadata={},
    )

    for (date, format, metadata) in metadata_entries:
        load_metadata(
            storage,
            row["id"],
            row["directory"],
            date,
            metadata,
            format,
            authority=authority,
            origin=origin,
            dry_run=dry_run,
        )

    return (origin, discovery_date)
예제 #14
0
def deposit_list_datatables(request: Request) -> JsonResponse:
    """Special API view to list and filter deposits, produced responses are intended
    to be consumed by datatables js framework used in deposits admin Web UI."""
    table_data: Dict[str, Any] = {}
    table_data["draw"] = int(request.GET.get("draw", 1))
    try:
        username = request.GET.get("username")
        if username:
            deposits = Deposit.objects.select_related("client").filter(
                client__username=username)
        else:
            deposits = Deposit.objects.all()

        deposits_count = deposits.count()
        search_value = request.GET.get("search[value]")
        if search_value:
            deposits = deposits.filter(_deposit_search_query(search_value))

        exclude_pattern = request.GET.get("excludePattern")
        if exclude_pattern:
            deposits = deposits.exclude(_deposit_search_query(exclude_pattern))

        column_order = request.GET.get("order[0][column]")
        field_order = request.GET.get("columns[%s][name]" % column_order, "id")
        order_dir = request.GET.get("order[0][dir]", "desc")

        if order_dir == "desc":
            field_order = "-" + field_order

        deposits = deposits.order_by(field_order)

        length = int(request.GET.get("length", 10))
        page = int(request.GET.get("start", 0)) // length + 1
        paginator = Paginator(deposits, length)

        data = [
            DepositSerializer(_enrich_deposit_with_metadata(d)).data
            for d in paginator.page(page).object_list
        ]

        table_data["recordsTotal"] = deposits_count
        table_data["recordsFiltered"] = deposits.count()
        data_list = []
        for d in data:
            data_dict = {
                "id": d["id"],
                "type": d["type"],
                "external_id": d["external_id"],
                "raw_metadata": d["raw_metadata"],
                "reception_date": d["reception_date"],
                "status": d["status"],
                "status_detail": d["status_detail"],
                "swhid": d["swhid"],
                "swhid_context": d["swhid_context"],
            }
            provenance = None
            raw_metadata = d["raw_metadata"]
            # for meta deposit, the uri should be the url provenance
            if raw_metadata and d["type"] == "meta":  # metadata provenance
                provenance = parse_swh_metadata_provenance(
                    ElementTree.fromstring(raw_metadata))
            # For code deposits the uri is the origin
            # First, trying to determine it out of the raw metadata associated with the
            # deposit
            elif raw_metadata and d["type"] == "code":
                create_origin_url, add_to_origin_url = parse_swh_deposit_origin(
                    ElementTree.fromstring(raw_metadata))
                provenance = create_origin_url or add_to_origin_url

            # For code deposits, if not provided, use the origin_url
            if not provenance and d["type"] == "code":
                if d["origin_url"]:
                    provenance = d["origin_url"]

                # If still not found, fallback using the swhid context
                if not provenance and d["swhid_context"]:
                    swhid = QualifiedSWHID.from_string(d["swhid_context"])
                    provenance = swhid.origin

            data_dict["uri"] = provenance  # could be None

            data_list.append(data_dict)

        table_data["data"] = data_list

    except Exception as exc:
        sentry_sdk.capture_exception(exc)
        table_data[
            "error"] = "An error occurred while retrieving the list of deposits !"
        if settings.DEBUG:
            table_data["error"] += "\n" + str(exc)

    return JsonResponse(table_data)
예제 #15
0
def test_migrations_22_add_deposit_type_column_model_and_data(migrator):
    """22 migration should add the type column and migrate old values with new type"""
    from swh.deposit.models import (
        DEPOSIT_CODE,
        DEPOSIT_METADATA_ONLY,
        Deposit,
        DepositClient,
        DepositCollection,
    )

    old_state = migrator.apply_initial_migration(
        ("deposit", "0021_deposit_origin_url_20201124_1438"))
    old_deposit = old_state.apps.get_model("deposit", "Deposit")

    collection = DepositCollection.objects.create(name="hello")

    client = DepositClient.objects.create(username="******",
                                          collections=[collection.id])

    # Create old deposits to make sure they are migrated properly
    deposit1 = old_deposit.objects.create(status="partial",
                                          client_id=client.id,
                                          collection_id=collection.id)
    deposit2 = old_deposit.objects.create(status="verified",
                                          client_id=client.id,
                                          collection_id=collection.id)

    origin = "https://hal.archives-ouvertes.fr/hal-01727745"
    directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b"
    release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10")
    snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0")

    date_now = now()
    # metadata deposit
    deposit3 = old_deposit.objects.create(
        status=DEPOSIT_STATUS_LOAD_SUCCESS,
        client_id=client.id,
        collection_id=collection.id,
        swhid=CoreSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=hash_to_bytes(directory_id),
        ),
        swhid_context=QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=hash_to_bytes(directory_id),
            origin=origin,
            visit=CoreSWHID(object_type=ObjectType.SNAPSHOT,
                            object_id=snapshot_id),
            anchor=CoreSWHID(object_type=ObjectType.RELEASE,
                             object_id=release_id),
            path=b"/",
        ),
    )
    # work around (complete date is installed on creation)
    deposit3.complete_date = date_now
    deposit3.reception_date = date_now
    deposit3.save()

    assert hasattr(old_deposit, "type") is False

    # Migrate to the latest schema
    new_state = migrator.apply_tested_migration(
        ("deposit", "0022_auto_20220223_1542"))
    new_deposit = new_state.apps.get_model("deposit", "Deposit")

    assert hasattr(new_deposit, "type") is True

    assert Deposit().type == DEPOSIT_CODE

    all_deposits = Deposit.objects.all()
    assert len(all_deposits) == 3
    for deposit in all_deposits:
        if deposit.id in (deposit1.id, deposit2.id):
            assert deposit.type == DEPOSIT_CODE
        else:
            assert deposit.id == deposit3.id and deposit.type == DEPOSIT_METADATA_ONLY
예제 #16
0
    def process_put(
        self,
        request,
        headers: ParsedRequestHeaders,
        collection_name: str,
        deposit: Deposit,
    ) -> None:
        """This allows the following scenarios:

        - multipart: replace all the deposit (status partial) metadata and archive
          with the provided ones.
        - atom: replace all the deposit (status partial) metadata with the
          provided ones.
        - with swhid, atom: Add new metatada to deposit (status done) with provided ones
          and push such metadata to the metadata storage directly.

           source:
           - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata
           - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart

        Raises:
            400 if any of the following occur:
            - the swhid provided and the deposit swhid do not match
            - the provided metadata xml file is malformed
            - the provided xml atom entry is empty
            - the provided swhid does not exist in the archive

        """  # noqa
        swhid = headers.swhid
        if swhid is None:
            if request.content_type.startswith("multipart/"):
                self._multipart_upload(
                    request,
                    headers,
                    collection_name,
                    deposit=deposit,
                    replace_archives=True,
                    replace_metadata=True,
                )
            else:
                # standard metadata update (replace all metadata already provided to the
                # deposit by the new ones)
                self._atom_entry(
                    request,
                    headers,
                    collection_name,
                    deposit=deposit,
                    replace_metadata=True,
                )
            return

        # Update metadata on a deposit already ingested
        # Write to the metadata storage (and the deposit backend)
        # no ingestion triggered

        assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS

        if swhid != deposit.swhid:
            raise DepositError(
                BAD_REQUEST,
                f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.",
                "The provided SWHID does not match the deposit to update. "
                "Please ensure you send the correct deposit SWHID.",
            )

        try:
            raw_metadata, metadata_tree = self._read_metadata(request.data)
        except ParserError:
            raise DepositError(
                BAD_REQUEST,
                "Malformed xml metadata",
                "The xml received is malformed. "
                "Please ensure your metadata file is correctly formatted.",
            )

        if len(metadata_tree) == 0:
            raise DepositError(
                BAD_REQUEST,
                "Empty body request is not supported",
                "Atom entry deposit is supposed to send for metadata. "
                "If the body is empty, there is no metadata.",
            )

        _, deposit, deposit_request = self._store_metadata_deposit(
            deposit,
            QualifiedSWHID.from_string(swhid),
            metadata_tree,
            raw_metadata,
            deposit.origin_url,
        )