Beispiel #1
0
    def test_timezone_insensitive(self):
        """Checks the timezone of the datetime.datetime does not affect the
        hashed git_object."""
        utc_plus_one = datetime.timezone(datetime.timedelta(hours=1))
        metadata = {
            **self.minimal,
            "discovery_date":
            datetime.datetime(
                2021,
                1,
                25,
                12,
                27,
                51,
                tzinfo=utc_plus_one,
            ),
        }

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(self.minimal)),
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.minimal).id,
            RawExtrinsicMetadata.from_dict(metadata).id,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
        )
Beispiel #2
0
    def test_microsecond_insensitive(self):
        """Checks the microseconds of the datetime.datetime does not affect the
        hashed manifest."""
        metadata = {
            **self.minimal,
            "discovery_date":
            datetime.datetime(
                2021,
                1,
                25,
                11,
                27,
                51,
                123456,
                tzinfo=datetime.timezone.utc,
            ),
        }

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(self.minimal)),
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.minimal).id,
            RawExtrinsicMetadata.from_dict(metadata).id,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
        )
Beispiel #3
0
    def test_maximal(self):
        git_object = (
            b"raw_extrinsic_metadata 533\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date 1611574071\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"origin https://forge.softwareheritage.org/source/swh-model/\n"
            b"visit 42\n"
            b"snapshot swh:1:snp:0000000000000000000000000000000000000000\n"
            b"release swh:1:rel:0101010101010101010101010101010101010101\n"
            b"revision swh:1:rev:0202020202020202020202020202020202020202\n"
            b"path /abc/def\n"
            b"directory swh:1:dir:0303030303030303030303030303030303030303\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(self.maximal)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.maximal).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.maximal).id,
            _x("f96966e1093d15236a31fde07e47d5b1c9428049"),
        )
Beispiel #4
0
    def test_nonascii_path(self):
        metadata = {
            **self.minimal,
            "path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f",
        }
        git_object = (
            b"raw_extrinsic_metadata 231\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date 1611574071\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"path /ab\n"
            b" c/d\xf0\x9f\xa4\xb7e\x00f\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("7cc83fd1912176510c083f5df43f01b09af4b333"),
        )
Beispiel #5
0
    def test_negative_epoch(self):
        metadata = {
            **self.minimal,
            "discovery_date":
            datetime.datetime(
                1969,
                12,
                31,
                23,
                59,
                59,
                1,
                tzinfo=datetime.timezone.utc,
            ),
        }

        git_object = (
            b"raw_extrinsic_metadata 202\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date -1\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("be7154a8fd49d87f81547ea634d1e2152907d089"),
        )
Beispiel #6
0
    def test_epoch(self):
        metadata = {
            **self.minimal,
            "discovery_date":
            datetime.datetime(
                1970,
                1,
                1,
                0,
                0,
                0,
                tzinfo=datetime.timezone.utc,
            ),
        }

        git_object = (
            b"raw_extrinsic_metadata 201\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date 0\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("27a53df54ace35ebd910493cdc70b334d6b7cb88"),
        )
Beispiel #7
0
    def test_negative_timestamp(self):
        metadata = {
            **self.minimal,
            "discovery_date":
            datetime.datetime(
                1960,
                1,
                25,
                11,
                27,
                51,
                tzinfo=datetime.timezone.utc,
            ),
        }

        git_object = (
            b"raw_extrinsic_metadata 210\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date -313504329\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("895d0821a2991dd376ddc303424aceb7c68280f9"),
        )
Beispiel #8
0
    def test_minimal(self):
        git_object = (
            b"raw_extrinsic_metadata 210\0"
            b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
            b"discovery_date 1611574071\n"
            b"authority forge https://forge.softwareheritage.org/\n"
            b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
            b"format json\n"
            b"\n"
            b'{"foo": "bar"}')

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(self.minimal)),
            git_object,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.minimal).id,
            hashlib.sha1(git_object).digest(),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.minimal).id,
            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
        )
Beispiel #9
0
def test_deposit_loading_ok_2(swh_storage, deposit_client,
                              requests_mock_datadir):
    """Field dates should be se appropriately"""
    external_id = "some-external-id"
    url = f"https://hal-test.archives-ouvertes.fr/{external_id}"
    deposit_id = 777
    loader = DepositLoader(swh_storage,
                           url,
                           deposit_id,
                           deposit_client,
                           default_filename="archive.zip")

    actual_load_status = loader.load()
    expected_snapshot_id = "3449b8ff31abeacefd33cca60e3074c1649dc3a1"

    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id,
    }
    assert_last_visit_matches(
        loader.storage,
        url,
        status="full",
        type="deposit",
        snapshot=hash_to_bytes(expected_snapshot_id),
    )

    release_id = "ba6c9a59ae3256e765d32b211cc183dc2380aed7"
    expected_snapshot = Snapshot(
        id=hash_to_bytes(expected_snapshot_id),
        branches={
            b"HEAD":
            SnapshotBranch(target=hash_to_bytes(release_id),
                           target_type=TargetType.RELEASE)
        },
    )

    check_snapshot(expected_snapshot, storage=loader.storage)

    raw_meta = loader.client.metadata_get(deposit_id)
    # Ensure the date fields are set appropriately in the release

    # Retrieve the release
    release = loader.storage.release_get([hash_to_bytes(release_id)])[0]
    assert release
    # swh-deposit uses the numeric 'offset_minutes' instead of the bytes offset
    # attribute, because its dates are always well-formed, and it can only send
    # JSON-serializable data.
    release_date_dict = {
        "timestamp": release.date.timestamp.to_dict(),
        "offset": release.date.offset_minutes(),
    }

    assert release_date_dict == raw_meta["deposit"]["author_date"]

    assert not release.metadata

    provider = {
        "provider_name": "hal",
        "provider_type": "deposit_client",
        "provider_url": "https://hal-test.archives-ouvertes.fr/",
        "metadata": None,
    }
    tool = {
        "name": "swh-deposit",
        "version": "0.0.1",
        "configuration": {
            "sword_version": "2"
        },
    }

    fetcher = MetadataFetcher(
        name="swh-deposit",
        version="0.0.1",
    )

    authority = MetadataAuthority(
        type=MetadataAuthorityType.DEPOSIT_CLIENT,
        url="https://hal-test.archives-ouvertes.fr/",
    )

    # Check the origin metadata swh side
    origin_extrinsic_metadata = loader.storage.raw_extrinsic_metadata_get(
        Origin(url).swhid(), authority)
    assert origin_extrinsic_metadata.next_page_token is None
    raw_metadata: str = raw_meta["raw_metadata"]
    # 1 raw metadata xml + 1 json dict
    assert len(origin_extrinsic_metadata.results) == 2

    origin_swhid = Origin(url).swhid()

    expected_metadata = []
    origin_meta = origin_extrinsic_metadata.results[0]
    expected_metadata.append(
        RawExtrinsicMetadata(
            target=origin_swhid,
            discovery_date=origin_meta.discovery_date,
            metadata=raw_metadata.encode(),
            format="sword-v2-atom-codemeta-v2",
            authority=authority,
            fetcher=fetcher,
        ))

    origin_metadata = {
        "metadata": [raw_metadata],
        "provider": provider,
        "tool": tool,
    }
    expected_metadata.append(
        RawExtrinsicMetadata(
            target=origin_swhid,
            discovery_date=origin_extrinsic_metadata.results[-1].
            discovery_date,
            metadata=json.dumps(origin_metadata).encode(),
            format="original-artifacts-json",
            authority=authority,
            fetcher=fetcher,
        ))

    assert sorted(
        origin_extrinsic_metadata.results) == sorted(expected_metadata)

    # Check the release metadata swh side
    assert release.target_type == ModelObjectType.DIRECTORY
    directory_swhid = ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY,
                                    object_id=release.target)
    actual_directory_metadata = loader.storage.raw_extrinsic_metadata_get(
        directory_swhid, authority)

    assert actual_directory_metadata.next_page_token is None
    assert len(actual_directory_metadata.results) == 1

    release_swhid = CoreSWHID(object_type=ObjectType.RELEASE,
                              object_id=hash_to_bytes(release_id))
    dir_metadata_template = RawExtrinsicMetadata(
        target=directory_swhid,
        format="sword-v2-atom-codemeta-v2",
        authority=authority,
        fetcher=fetcher,
        origin=url,
        release=release_swhid,
        # to satisfy the constructor
        discovery_date=now(),
        metadata=b"",
    )

    expected_directory_metadata = []
    dir_metadata = actual_directory_metadata.results[0]
    expected_directory_metadata.append(
        RawExtrinsicMetadata.from_dict({
            **{
                k: v
                for (k, v) in dir_metadata_template.to_dict().items() if k != "id"
            },
            "discovery_date": dir_metadata.discovery_date,
            "metadata": raw_metadata.encode(),
        }))

    assert sorted(actual_directory_metadata.results) == sorted(
        expected_directory_metadata)

    # Retrieve the information for deposit status update query to the deposit
    urls = [
        m for m in requests_mock_datadir.request_history
        if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
    ]

    assert len(urls) == 1
    update_query = urls[0]

    body = update_query.json()
    expected_body = {
        "status": "done",
        "release_id": release_id,
        "directory_id": hash_to_hex(release.target),
        "snapshot_id": expected_snapshot_id,
        "origin_url": url,
    }

    assert body == expected_body
Beispiel #10
0
            "discovery_date":
            datetime.datetime(
                2021,
                1,
                25,
                11,
                27,
                50,
                1_000_000 - 42,
                tzinfo=tz,
            ),
        }

        self.assertEqual(
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(self.minimal)),
            git_objects.raw_extrinsic_metadata_git_object(
                RawExtrinsicMetadata.from_dict(metadata)),
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(self.minimal).id,
            RawExtrinsicMetadata.from_dict(metadata).id,
        )
        self.assertEqual(
            RawExtrinsicMetadata.from_dict(metadata).id,
            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
        )

    def test_negative_timestamp(self):
        metadata = {
            **self.minimal,