Beispiel #1
0
def test_nixguix():
    extrinsic_metadata = {
        "url":
        "https://files.pythonhosted.org/packages/source/a/alerta/alerta-7.4.5.tar.gz",
        "integrity": "sha256-km8RAaG1ep+tYR8eHVr3UWk+/MNEqdsBr1Di/g02LYQ=",
    }
    original_artifacts = [{
        "length": 34903,
        "filename": "alerta-7.4.5.tar.gz",
        "checksums": {
            "sha1":
            "66db4398b664de272fd5aa6610caa776b5e64651",
            "sha256":
            "926f1101a1b57a9fad611f1e1d5af751693efcc344a9db01af50e2fe0d362d84",
        },
    }]

    row = {
        "id":
        b"\x00\x01\xbaM\xd0S\x94\x85\x02\x11\xd7\xb3\x85M\x99\x13\xd2:\xe3y",
        "directory": DIRECTORY_ID,
        "date": None,
        "committer_date": None,
        "type": "tar",
        "message": b"",
        "metadata": {
            "extrinsic": {
                "raw":
                extrinsic_metadata,
                "when":
                "2020-06-03T11:25:05.259341+00:00",
                "provider":
                "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json",
            },
            "original_artifact": original_artifacts,
        },
    }

    origin_url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(row, storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(2020,
                                                 6,
                                                 3,
                                                 11,
                                                 25,
                                                 5,
                                                 259341,
                                                 tzinfo=datetime.timezone.utc),
                authority=NIX_UNSTABLE_AUTHORITY,
                fetcher=FETCHER,
                format="nixguix-sources-json",
                metadata=json.dumps(extrinsic_metadata).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"),
            ),
        ]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(2020,
                                                 6,
                                                 3,
                                                 11,
                                                 25,
                                                 5,
                                                 259341,
                                                 tzinfo=datetime.timezone.utc),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379"),
            ),
        ]),
    ]
def test_debian_without_extrinsic():
    source_original_artifacts = [
        {
            "name": "pymongo_1.10-1.dsc",
            "sha1": "81877c1ae4406c2519b9cc9c4557cf6b0775a241",
            "length": 99,
            "sha256":
            "40269a73f38ee4c2f9cc021f1d5d091cc59ca6e778c339684b7be030e29e282f",
            "sha1_git": "0ac7bdb8e4d10926c5d3e51baa2be7bb29a3966b",
        },
        {
            "name": "pymongo_1.10.orig.tar.gz",
            "sha1": "4f4c97641b86ac8f21396281bd1a7369236693c3",
            "length": 99,
            "sha256":
            "0b6bffb310782ffaeb3916c75790742ec5830c63a758fc711cd1f557eb5a4b5f",
            "sha1_git": "19ef0adda8868520d1ef9d4164b3ace4df1d62ad",
        },
        {
            "name": "pymongo_1.10-1.debian.tar.gz",
            "sha1": "fbf378296613c8d55e043aec98896b3e50a94971",
            "length": 99,
            "sha256":
            "3970cc70fe3ba6499a9c56ba4b4c6c3782f56433d0d17d72b7a0e2ceae31b513",
            "sha1_git": "2eea9904806050a8fda95edd5d4fa60d29c1fdec",
        },
    ]

    dest_original_artifacts = [
        {
            "length": 99,
            "filename": "pymongo_1.10-1.dsc",
            "checksums": {
                "sha1": "81877c1ae4406c2519b9cc9c4557cf6b0775a241",
                "sha256":
                "40269a73f38ee4c2f9cc021f1d5d091cc59ca6e778c339684b7be030e29e282f",
                "sha1_git": "0ac7bdb8e4d10926c5d3e51baa2be7bb29a3966b",
            },
        },
        {
            "length": 99,
            "filename": "pymongo_1.10.orig.tar.gz",
            "checksums": {
                "sha1": "4f4c97641b86ac8f21396281bd1a7369236693c3",
                "sha256":
                "0b6bffb310782ffaeb3916c75790742ec5830c63a758fc711cd1f557eb5a4b5f",
                "sha1_git": "19ef0adda8868520d1ef9d4164b3ace4df1d62ad",
            },
        },
        {
            "length": 99,
            "filename": "pymongo_1.10-1.debian.tar.gz",
            "checksums": {
                "sha1": "fbf378296613c8d55e043aec98896b3e50a94971",
                "sha256":
                "3970cc70fe3ba6499a9c56ba4b4c6c3782f56433d0d17d72b7a0e2ceae31b513",
                "sha1_git": "2eea9904806050a8fda95edd5d4fa60d29c1fdec",
            },
        },
    ]

    row = {
        "id":
        b"\x00\x00\x01\xc2\x8c\x8f\xca\x01\xb9\x04\xde\x92\xa2d\n\x86l\xe0<\xb7",
        "directory":
        DIRECTORY_ID,
        "date":
        datetime.datetime(2011,
                          3,
                          31,
                          20,
                          17,
                          41,
                          tzinfo=datetime.timezone.utc),
        "date_offset":
        0,
        "type":
        "dsc",
        "message":
        b"Synthetic revision for Debian source package pymongo version 1.10-1",
        "metadata": {
            "package_info": {
                "name":
                "pymongo",
                "version":
                "1.10-1",
                "changelog": {
                    # ...
                },
                "maintainers": [
                    {
                        "name": "Federico Ceratto",
                        "email": "*****@*****.**"
                    },
                    {
                        "name": "Janos Guljas",
                        "email": "*****@*****.**"
                    },
                ],
                "pgp_signature": {
                    "date": "2011-03-31T21:02:44+00:00",
                    "keyid": "2BABC6254E66E7B8450AC3E1E6AA90171392B174",
                    "person": {
                        "name": "David Paleino",
                        "email": "*****@*****.**"
                    },
                },
                "lister_metadata": {
                    "id": 244296,
                    "lister": "snapshot.debian.org"
                },
            },
            "original_artifact": source_original_artifacts,
        },
    }

    storage = Mock()

    origin_url = "http://snapshot.debian.org/package/pymongo"

    deposit_cur = None
    with patch("debian_origins_from_row", return_value=[origin_url]):
        handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(2011,
                                                 3,
                                                 31,
                                                 20,
                                                 17,
                                                 41,
                                                 tzinfo=datetime.timezone.utc),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(dest_original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7"),
            ),
        ])
    ]
def test_gnu():
    original_artifacts = [{
        "length": 842501,
        "filename": "gperf-3.0.1.tar.gz",
        "checksums": {
            "sha1":
            "c4453ee492032b369006ee464f4dd4e2c0c0e650",
            "sha256":
            "5be283ef62e1bd26abdaaf88b416dbea4b14c360b09befcda2f055656dc43f87",
            "sha1_git":
            "bf1d5bb57d571101dd7b6acab2b78ae11bb861de",
            "blake2s256":
            "661f84afeb1e0b914defe2b249d424af1dfe380a96016b3282ae758c70e19a70",
        },
    }]

    row = {
        "id":
        b"\x00\x1cqE\x8e@[%\xba\xcc\xc8\x0b\x99\xf6cM\xff\x9d+\x18",
        "directory":
        DIRECTORY_ID,
        "date":
        datetime.datetime(2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc),
        "committer_date":
        datetime.datetime(2003, 6, 13, 0, 11, tzinfo=datetime.timezone.utc),
        "type":
        "tar",
        "message":
        b"swh-loader-package: synthetic revision message",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "url": "https://ftp.gnu.org/gnu/gperf/gperf-3.0.1.tar.gz",
                    "time": "2003-06-13T00:11:00+00:00",
                    "length": 842501,
                    "version": "3.0.1",
                    "filename": "gperf-3.0.1.tar.gz",
                },
                "when": "2019-11-27T11:17:38.318997+00:00",
                "provider": "https://ftp.gnu.org/gnu/gperf/",
            },
            "intrinsic": {},
            "original_artifact": original_artifacts,
        },
    }

    origin_url = "https://ftp.gnu.org/gnu/gperf/"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(row, storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(2019,
                                                 11,
                                                 27,
                                                 11,
                                                 17,
                                                 38,
                                                 318997,
                                                 tzinfo=datetime.timezone.utc),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18"),
            ),
        ]),
    ]
def test_npm_1():
    """Tests loading a revision generated by a new NPM loader that
    has a provider."""

    extrinsic_metadata = {
        "_id": "@l3ilkojr/[email protected]",
        "dist": {
            "shasum": "b7f0d66090e0285f4e95d082d39bcb0c1b8f4ec8",
            "tarball": "https://registry.npmjs.org/@l3ilkojr/jdinsults/-/jdinsults-3.0.0.tgz",
            "fileCount": 4,
            "integrity": "sha512-qpv8Zg51g0l51VjODEooMUGSGanGUuQpzX5msfR7ZzbgTsgPbpDNyTIsQ0wQzI9RzCCUjS84Ii2VhMISEQcEUA==",
            "unpackedSize": 1583,
            "npm-signature": "-----BEGIN PGP SIGNATURE-----\r\nVersion: OpenPGP.js v3.0.4\r\nComment: https://openpgpjs.org\r\n\r\nwsFcBAEBCAAQBQJeUMS5CRA9TVsSAnZWagAAXpgP/0YgNOWN0U/Fz2RGeQhR\nVIKPvfGqZ2UfFxxUXWIc4QHvwyLCNUedCctpVdqnqmGJ9m/hj3K2zbRPD7Tm\n3nPl0HfzE7v3T8TDZfGhzW3c9mWxig+syr+sjo0EKyAgZVJ0mxbjOl4KHt+U\nQEwl/4falBsyYtK/pkCXWmmuC606QmPn/c6ZRD1Fw4vJjT9i5qi1KaBkIf6M\nnFmpOFxTcwxGGltOk3s3TKDtr8CIeWmdm3VkgsP2ErkPKAOcu12AT4/5tkg0\nDU+m1XmJb67rskb4Ncjvic/VutnPkEfNrk1IRXrmjDZBQbHtCJ7hd5ETmb9S\nE5WmMV8cpaGiW7AZvGTmkn5WETwQQU7po914zYiMg9+ozdwc7yC8cpGj/UoF\niKxsc1uxdfwWk/p3dShegEYM7sveloIXYsPaxbd84WRIfnwkWFZV82op96E3\neX+FRkhMfsHlK8OjZsBPXkppaB48jnZdm3GOOzT9YgyphV33j3J9GnNcDMDe\nriyCLV1BNSKDHElCDrvl1cBGg+C5qn/cTYjQdfEPPY2Hl2MgW9s4UV2s+YSx\n0BBd2A3j80wncP+Y7HFeC4Pv0SM0Pdq6xJaf3ELhj6j0rVZeTW1O3E/PFLXK\nnn/DZcsFXgIzjY+eBIMQgAhqyeJve8LeQNnGt3iNW10E2nZMpfc+dn0ESiwV\n2Gw4\r\n=8uqZ\r\n-----END PGP SIGNATURE-----\r\n",
        },
        "name": "@l3ilkojr/jdinsults",
        "version": "3.0.0",
        "_npmUser": {"name": "l3ilkojr", "email": "*****@*****.**"},
        "_npmVersion": "6.13.6",
        "description": "Generates insults",
        "directories": {},
        "maintainers": [{"name": "l3ilkojr", "email": "*****@*****.**"}],
        "_nodeVersion": "10.14.0",
        "_hasShrinkwrap": False,
        "_npmOperationalInternal": {
            "tmp": "tmp/jdinsults_3.0.0_1582351545285_0.2614827716102821",
            "host": "s3://npm-registry-packages",
        },
    }

    original_artifacts = [
        {
            "length": 1033,
            "filename": "jdinsults-3.0.0.tgz",
            "checksums": {
                "sha1": "b7f0d66090e0285f4e95d082d39bcb0c1b8f4ec8",
                "sha256": "42f22795ac883b02fded0b2bf3d8a77f6507d40bc67f28eea6b1b73eb59c515f",
            },
        }
    ]

    row = {
        "id": b"\x00\x00\x02\xa4\x9b\xba\x17\xca\x8c\xf3\x7f_=\x16\xaa\xac\xf9S`\xfc",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2020, 2, 22, 6, 5, 45, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2020, 2, 22, 6, 5, 45, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"3.0.0",
        "metadata": {
            "extrinsic": {
                "raw": extrinsic_metadata,
                "when": "2020-02-27T01:35:47.965375+00:00",
                "provider": "https://replicate.npmjs.com/%40l3ilkojr%2Fjdinsults/",
            },
            "intrinsic": {
                "raw": {"name": "@l3ilkojr/jdinsults", "version": "3.0.0"},
                "tool": "package.json",
            },
            "original_artifact": original_artifacts,
        },
    }

    origin_url = "https://www.npmjs.com/package/@l3ilkojr/jdinsults"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020,
                        2,
                        27,
                        1,
                        35,
                        47,
                        965375,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=NPM_AUTHORITY,
                    fetcher=FETCHER,
                    format="replicate-npm-package-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020,
                        2,
                        27,
                        1,
                        35,
                        47,
                        965375,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc"
                    ),
                ),
            ]
        ),
    ]
def test_debian_with_extrinsic():
    dest_original_artifacts = [
        {
            "length":
            2936,
            "filename":
            "kalgebra_19.12.1-1.dsc",
            "checksums": {
                "sha1":
                "f869e9f1155b1ee6d28ae3b40060570152a358cd",
                "sha256":
                "75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11",
            },
            "url":
            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc",
        },
        {
            "length":
            1156408,
            "filename":
            "kalgebra_19.12.1.orig.tar.xz",
            "checksums": {
                "sha1":
                "e496032962212983a5359aebadfe13c4026fd45c",
                "sha256":
                "49d623186800eb8f6fbb91eb43fb14dff78e112624c9cda6b331d494d610b16a",
            },
            "url":
            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz",
        },
        {
            "length":
            10044,
            "filename":
            "kalgebra_19.12.1-1.debian.tar.xz",
            "checksums": {
                "sha1":
                "b518bfc2ac708b40577c595bd539faa8b84572db",
                "sha256":
                "1a30acd2699c3769da302f7a0c63a7d7b060f80925b38c8c43ce3bec92744d67",
            },
            "url":
            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.debian.tar.xz",
        },
        {
            "length":
            488,
            "filename":
            "kalgebra_19.12.1.orig.tar.xz.asc",
            "checksums": {
                "sha1":
                "ff53a5c21c1aef2b9caa38a02fa3488f43df4c20",
                "sha256":
                "a37e0b95bb1f16b19b0587bc5d3b99ba63a195d7f6335c4a359122ad96d682dd",
            },
            "url":
            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz.asc",
        },
    ]

    source_original_artifacts = [{k: v
                                  for (k, v) in d.items() if k != "url"}
                                 for d in dest_original_artifacts]

    row = {
        "id":
        b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee",
        "directory":
        DIRECTORY_ID,
        "date":
        datetime.datetime(
            2020,
            1,
            26,
            22,
            3,
            24,
            tzinfo=datetime.timezone.utc,
        ),
        "date_offset":
        60,
        "type":
        "dsc",
        "message":
        b"Synthetic revision for Debian source package kalgebra version 4:19.12.1-1",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "id": 2718802,
                    "name": "kalgebra",
                    "files": {
                        "kalgebra_19.12.1-1.dsc": {
                            "uri":
                            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc",
                            "name":
                            "kalgebra_19.12.1-1.dsc",
                            "size":
                            2936,
                            "md5sum":
                            "fd28f604d4cc31a0a305543230f1622a",
                            "sha256":
                            "75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11",
                        },
                        "kalgebra_19.12.1.orig.tar.xz": {
                            "uri":
                            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz",
                            "name":
                            "kalgebra_19.12.1.orig.tar.xz",
                            "size":
                            1156408,
                            "md5sum":
                            "34e09ed152da762d53101ea33634712b",
                            "sha256":
                            "49d623186800eb8f6fbb91eb43fb14dff78e112624c9cda6b331d494d610b16a",
                        },
                        "kalgebra_19.12.1-1.debian.tar.xz": {
                            "uri":
                            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.debian.tar.xz",
                            "name":
                            "kalgebra_19.12.1-1.debian.tar.xz",
                            "size":
                            10044,
                            "md5sum":
                            "4f639f36143898d97d044f273f038e58",
                            "sha256":
                            "1a30acd2699c3769da302f7a0c63a7d7b060f80925b38c8c43ce3bec92744d67",
                        },
                        "kalgebra_19.12.1.orig.tar.xz.asc": {
                            "uri":
                            "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz.asc",
                            "name":
                            "kalgebra_19.12.1.orig.tar.xz.asc",
                            "size":
                            488,
                            "md5sum":
                            "3c29291e4e6f0c294de80feb8e9fce4c",
                            "sha256":
                            "a37e0b95bb1f16b19b0587bc5d3b99ba63a195d7f6335c4a359122ad96d682dd",
                        },
                    },
                    "version": "4:19.12.1-1",
                    "revision_id": None,
                },
                "when":
                "2020-01-27T19:32:03.925498+00:00",
                "provider":
                "http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc",
            },
            "intrinsic": {
                "raw": {
                    "name": "kalgebra",
                    "version": "4:19.12.1-1",
                    # ...
                },
                "tool": "dsc",
            },
            "original_artifact": source_original_artifacts,
        },
    }

    origin_url = "deb://Debian/packages/kalgebra"

    storage = Mock()

    deposit_cur = None

    with patch("debian_origins_from_row", return_value=[origin_url]):
        handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(
                    2020,
                    1,
                    26,
                    22,
                    3,
                    24,
                    tzinfo=datetime.timezone.utc,
                ),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(dest_original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee"),
            ),
        ]),
    ]
def test_npm_2_scoped():
    """Tests loading a revision generated by an old NPM loader that doesn't
    have a provider; and the package name is scoped (ie. in the format
    @org/name)."""

    extrinsic_metadata = {
        "bugs": {"url": "https://github.com/piximi/components/issues"},
        "name": "@piximi/components",
        # ...
    }

    package_source = {
        "url": "https://registry.npmjs.org/@piximi/components/-/components-0.1.11.tgz",
        "date": "2019-06-07T19:56:04.753Z",
        "name": "@piximi/components",
        "sha1": "4ab74e563cb61bb5b2022601a5133a2dd19d19ec",
        "sha256": "69bb980bd6de3277b6bca86fd79c91f1c28db6910c8d03ecd05b32b78a35188f",
        "version": "0.1.11",
        "filename": "components-0.1.11.tgz",
        "blake2s256": "ce33181d5eff25b70ffdd6f1a18acd472a1707ede23cd2adc6af272dfc40dbfd",
    }

    original_artifacts = [
        {
            "filename": "components-0.1.11.tgz",
            "checksums": {
                "sha1": "4ab74e563cb61bb5b2022601a5133a2dd19d19ec",
                "sha256": "69bb980bd6de3277b6bca86fd79c91f1c28db6910c8d03ecd05b32b78a35188f",
                "blake2s256": "ce33181d5eff25b70ffdd6f1a18acd472a1707ede23cd2adc6af272dfc40dbfd",
            },
            "url": "https://registry.npmjs.org/@piximi/components/-/components-0.1.11.tgz",
        }
    ]

    row = {
        "id": b"\x00\x00 \x19\xc5wXt\xbc\xed\x00zR\x9b\xd3\xb7\x8b\xf6\x04W",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"0.1.11",
        "metadata": {
            "package": extrinsic_metadata,
            "package_source": package_source,
        },
    }

    origin_url = "https://www.npmjs.com/package/@piximi/components"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019,
                        6,
                        7,
                        19,
                        56,
                        4,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=NPM_AUTHORITY,
                    fetcher=FETCHER,
                    format="replicate-npm-package-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019,
                        6,
                        7,
                        19,
                        56,
                        4,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457"
                    ),
                ),
            ]
        ),
    ]
def test_npm_2_unscoped():
    """Tests loading a revision generated by an old NPM loader that doesn't
    have a provider; and the package name is unscoped (ie. doesn't contain a
    slash)."""

    extrinsic_metadata = {
        "bugs": {"url": "https://github.com/niwasawa/simplemaps/issues"},
        "name": "simplemaps",
        "author": "Naoki Iwasawa",
        "license": "MIT",
        # ...
    }

    package_source = {
        "url": "https://registry.npmjs.org/simplemaps/-/simplemaps-0.0.6.tgz",
        "date": "2016-12-23T07:21:29.733Z",
        "name": "simplemaps",
        "sha1": "e2b8222930196def764527f5c61048c5b28fe3c4",
        "sha256": "3ce94927bab5feafea5695d1fa4c2b8131413e53e249b32f9ac2ccff4d865a0b",
        "version": "0.0.6",
        "filename": "simplemaps-0.0.6.tgz",
        "blake2s256": "6769b4009f8162be2e745604b153443d4907a85781d31a724217a3e2d42a7462",
    }

    original_artifacts = [
        {
            "filename": "simplemaps-0.0.6.tgz",
            "checksums": {
                "sha1": "e2b8222930196def764527f5c61048c5b28fe3c4",
                "sha256": "3ce94927bab5feafea5695d1fa4c2b8131413e53e249b32f9ac2ccff4d865a0b",
                "blake2s256": "6769b4009f8162be2e745604b153443d4907a85781d31a724217a3e2d42a7462",
            },
            "url": "https://registry.npmjs.org/simplemaps/-/simplemaps-0.0.6.tgz",
        }
    ]

    row = {
        "id": b"\x00\x00\x04\xae\xed\t\xee\x08\x9cx\x12d\xc0M%d\xfdX\xfe\xb5",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"0.0.6",
        "metadata": {
            "package": extrinsic_metadata,
            "package_source": package_source,
        },
    }

    origin_url = "https://www.npmjs.com/package/simplemaps"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2016,
                        12,
                        23,
                        7,
                        21,
                        29,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=NPM_AUTHORITY,
                    fetcher=FETCHER,
                    format="replicate-npm-package-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2016,
                        12,
                        23,
                        7,
                        21,
                        29,
                        tzinfo=datetime.timezone.utc,
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5"
                    ),
                ),
            ]
        ),
    ]
def test_cran():
    source_original_artifacts = [{
        "length": 170623,
        "filename": "ExtremeRisks_0.0.3.tar.gz",
        "checksums": {
            "sha1":
            "f2f19fc0f24b66b5ea9413366c632f3c229f7f3f",
            "sha256":
            "6f232556313019809dde3554149a1399bb1901a366b4965af49dc007d01945c9",
        },
    }]
    dest_original_artifacts = [{
        "length":
        170623,
        "filename":
        "ExtremeRisks_0.0.3.tar.gz",
        "checksums": {
            "sha1":
            "f2f19fc0f24b66b5ea9413366c632f3c229f7f3f",
            "sha256":
            "6f232556313019809dde3554149a1399bb1901a366b4965af49dc007d01945c9",
        },
        "url":
        "https://cran.r-project.org/src/contrib/ExtremeRisks_0.0.3.tar.gz",
    }]

    row = {
        "id":
        b"\x00\x03a\xaa3\x84,\xbd\xea_\xa6\xe7}\xb6\x96\xb97\xeb\xd2i",
        "directory":
        DIRECTORY_ID,
        "date":
        datetime.datetime(
            2020,
            5,
            5,
            0,
            0,
            tzinfo=datetime.timezone.utc,
        ),
        "committer_date":
        datetime.datetime(
            2020,
            5,
            5,
            0,
            0,
            tzinfo=datetime.timezone.utc,
        ),
        "type":
        "tar",
        "message":
        b"0.0.3",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "url":
                    "https://cran.r-project.org/src/contrib/ExtremeRisks_0.0.3.tar.gz",
                    "version": "0.0.3",
                },
                "when": "2020-05-07T15:27:38.652281+00:00",
                "provider": "https://cran.r-project.org/package=ExtremeRisks",
            },
            "intrinsic": {
                "raw": {
                    "URL": "mypage.unibocconi.it/simonepadoan/",
                    "Date": "2020-05-05",
                    "Title": "Extreme Risk Measures",
                    "Author":
                    "Simone Padoan [cre, aut],\n  Gilles Stupfler [aut]",
                    # ...
                    "Date/Publication": "2020-05-07 10:20:02 UTC",
                },
                "tool": "DESCRIPTION",
            },
            "original_artifact": source_original_artifacts,
        },
    }

    origin_url = "https://cran.r-project.org/package=ExtremeRisks"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(row, storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(
                    2020,
                    5,
                    7,
                    15,
                    27,
                    38,
                    652281,
                    tzinfo=datetime.timezone.utc,
                ),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(dest_original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269"),
            ),
        ]),
    ]
def test_cran_with_new_original_artifacts_format():
    original_artifacts = [{
        "url": "https://cran.r-project.org/src/contrib/r2mlm_0.1.0.tar.gz",
        "length": 346563,
        "filename": "r2mlm_0.1.0.tar.gz",
        "checksums": {
            "sha1":
            "25c06b4af523c35a7813b58dd0db414e79848501",
            "sha256":
            "c887fe6c4f78c94b2279759052e12d639cf80225b444c1f67931c6aa6f0faf23",
        },
    }]

    row = {
        "id": b'."7\x82\xeeK\xa1R\xe4\xc8\x86\xf7\x97\x97bA\xc3\x9a\x9a\xab',
        "directory": DIRECTORY_ID,
        "date": None,
        "committer_date": None,
        "type": "tar",
        "message": b"0.1.0",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "url":
                    "https://cran.r-project.org/src/contrib/r2mlm_0.1.0.tar.gz"
                },
                "when": "2020-09-25T14:04:20.926667+00:00",
                "provider": "https://cran.r-project.org/package=r2mlm",
            },
            "intrinsic": {
                "raw": {
                    "URL":
                    "https://github.com/mkshaw/r2mlm",
                    "Type":
                    "Package",
                    "Title":
                    "R-Squared Measures for Multilevel Models",
                    "Author":
                    "Mairead Shaw [aut, cre],\n  Jason Rights [aut],\n  Sonya Sterba [aut],\n  Jessica Flake [aut]",
                    # ...
                },
                "tool": "DESCRIPTION",
            },
            "original_artifact": original_artifacts,
        },
    }

    origin_url = "https://cran.r-project.org/package=r2mlm"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(row, storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(
                    2020,
                    9,
                    25,
                    14,
                    4,
                    20,
                    926667,
                    tzinfo=datetime.timezone.utc,
                ),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab"),
            ),
        ]),
    ]
def test_cran_without_revision_date():
    """Tests a CRAN revision with a date in the metadata but not as revision date"""
    source_original_artifacts = [{
        "length": 8018,
        "filename": "gofgamma_1.0.tar.gz",
        "checksums": {
            "sha1":
            "58f2993140f9e9e1a136554f0af0174a252f2c7b",
            "sha256":
            "55408f004642b5043bb01de831a7e7a0b9f24a30cb0151e70c2d37abdc508d03",
        },
    }]
    dest_original_artifacts = [{
        "length":
        8018,
        "filename":
        "gofgamma_1.0.tar.gz",
        "checksums": {
            "sha1":
            "58f2993140f9e9e1a136554f0af0174a252f2c7b",
            "sha256":
            "55408f004642b5043bb01de831a7e7a0b9f24a30cb0151e70c2d37abdc508d03",
        },
        "url":
        "https://cran.r-project.org/src/contrib/gofgamma_1.0.tar.gz",
    }]

    row = {
        "id":
        b'\x00\x00\xd4\xef^\x16a"\xae\xe6\x86*\xd3\x8a\x18\xceS\x86\xcc>',
        "directory": DIRECTORY_ID,
        "date": None,
        "committer_date": None,
        "type": "tar",
        "message": b"1.0",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "url":
                    "https://cran.r-project.org/src/contrib/gofgamma_1.0.tar.gz",
                    "version": "1.0",
                },
                "when": "2020-04-30T11:01:57.832481+00:00",
                "provider": "https://cran.r-project.org/package=gofgamma",
            },
            "intrinsic": {
                "raw": {
                    "Type":
                    "Package",
                    "Title":
                    "Goodness-of-Fit Tests for the Gamma Distribution",
                    "Author":
                    "Lucas Butsch [aut],\n  Bruno Ebner [aut, cre],\n  Steffen Betsch [aut]",
                    # ...
                },
                "tool": "DESCRIPTION",
            },
            "original_artifact": source_original_artifacts,
        },
    }

    origin_url = "https://cran.r-project.org/package=gofgamma"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add([
            RawExtrinsicMetadata(
                target=DIRECTORY_SWHID,
                discovery_date=datetime.datetime(
                    2020,
                    4,
                    30,
                    11,
                    1,
                    57,
                    832481,
                    tzinfo=datetime.timezone.utc,
                ),
                authority=SWH_AUTHORITY,
                fetcher=FETCHER,
                format="original-artifacts-json",
                metadata=json.dumps(dest_original_artifacts).encode(),
                origin=origin_url,
                revision=CoreSWHID.from_string(
                    "swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e"),
            ),
        ]),
    ]