예제 #1
0
def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadir):
    """With no prior visit, load a pypi project ends up with 1 snapshot"""
    url = "https://pypi.org/project/nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("1394b2e59351a944cc763bd9d26d90ce8e8121a8")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
    )

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch(
                target=hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch(
                target=hash_to_bytes("732bb9dc087e6015884daaebb8b82559be729b5a"),
                target_type=TargetType.RELEASE,
            ),
        },
    )
    check_snapshot(expected_snapshot, swh_storage)
예제 #2
0
def test_pypi_release_with_traceback(swh_storage, requests_mock_datadir):
    url = "https://pypi.org/project/0805nexter"
    with patch(
        "swh.loader.package.pypi.loader.PyPILoader.last_snapshot",
        side_effect=ValueError("Fake problem to fail the visit"),
    ):
        loader = PyPILoader(swh_storage, url)

        actual_load_status = loader.load()
        assert actual_load_status == {"status": "failed"}

        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")

        stats = get_stats(swh_storage)

        assert {
            "content": 0,
            "directory": 0,
            "origin": 1,
            "origin_visit": 1,
            "release": 0,
            "revision": 0,
            "skipped_content": 0,
            "snapshot": 0,
        } == stats
예제 #3
0
def test_pypi_no_release_artifact(swh_storage, requests_mock_datadir_missing_all):
    """Load a pypi project with all artifacts missing ends up with no snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "uneventful"
    assert actual_load_status["snapshot_id"] is not None

    empty_snapshot = Snapshot(branches={})

    assert_last_visit_matches(
        swh_storage, url, status="partial", type="pypi", snapshot=empty_snapshot.id
    )

    stats = get_stats(swh_storage)
    assert {
        "content": 0,
        "directory": 0,
        "origin": 1,
        "origin_visit": 1,
        "release": 0,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats
예제 #4
0
def test_pypi_origin_not_found(swh_storage, requests_mock_datadir):
    url = "https://pypi.org/project/unknown"
    loader = PyPILoader(swh_storage, url)

    assert loader.load() == {"status": "failed"}

    assert_last_visit_matches(
        swh_storage, url, status="not_found", type="pypi", snapshot=None
    )
예제 #5
0
def test_pypi_visit_with_missing_artifact(
    swh_storage, requests_mock_datadir_missing_one
):
    """Load a pypi project with some missing artifacts ends up with 1 snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("00785a38479abe5fbfa402df96be26d2ddf89c97")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage,
        url,
        status="partial",
        type="pypi",
        snapshot=expected_snapshot_id,
    )

    expected_snapshot = Snapshot(
        id=hash_to_bytes(expected_snapshot_id),
        branches={
            b"releases/1.2.0": SnapshotBranch(
                target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"),
                target_type=TargetType.RELEASE,
            ),
            b"HEAD": SnapshotBranch(
                target=b"releases/1.2.0",
                target_type=TargetType.ALIAS,
            ),
        },
    )
    check_snapshot(expected_snapshot, storage=swh_storage)

    stats = get_stats(swh_storage)

    assert {
        "content": 3,
        "directory": 2,
        "origin": 1,
        "origin_visit": 1,
        "release": 1,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats
예제 #6
0
def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir):
    """With no prior visit, load a pypi project ends up with 1 snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
    )

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"releases/1.1.0": SnapshotBranch(
                target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.2.0": SnapshotBranch(
                target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"),
                target_type=TargetType.RELEASE,
            ),
            b"HEAD": SnapshotBranch(
                target=b"releases/1.2.0",
                target_type=TargetType.ALIAS,
            ),
        },
    )
    check_snapshot(expected_snapshot, swh_storage)

    stats = get_stats(swh_storage)
    assert {
        "content": 6,
        "directory": 4,
        "origin": 1,
        "origin_visit": 1,
        "release": 2,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats
예제 #7
0
def test_pypi_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir):
    """Skip artifact with no intrinsic metadata during ingestion"""
    url = "https://pypi.org/project/upymenu"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    # no branch as one artifact without any intrinsic metadata
    expected_snapshot = Snapshot(id=expected_snapshot_id, branches={})

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
    )

    check_snapshot(expected_snapshot, swh_storage)
예제 #8
0
def test_pypi_build_release_missing_version_in_pkg_info(swh_storage, tmp_path):
    """Simulate release build when Version field is missing in PKG-INFO file."""
    url = "https://pypi.org/project/GermlineFilter"
    # create package info
    p_info = PyPIPackageInfo(
        url=url,
        filename="GermlineFilter-1.2.tar.gz",
        version="1.2",
        name="GermlineFilter",
        directory_extrinsic_metadata=[],
        raw_info={},
        comment_text="",
        sha256="e4982353c544d94b34f02c5690ab3d3ebc93480d5b62fe6f3317f23c515acc05",
        upload_time="2015-02-18T20:39:13",
    )

    # create PKG-INFO file with missing Version field
    package_path = tmp_path / "GermlineFilter-1.2"
    pkg_info_path = package_path / "PKG-INFO"
    package_path.mkdir()
    pkg_info_path.write_text(
        """Metadata-Version: 1.2
Name: germline_filter
Home-page:
Author: Cristian Caloian (OICR)
Author-email: [email protected]
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN"""
    )
    directory = hash_to_bytes("8b864d66f356afe35033d58f8e03b7c23a66751f")

    # attempt to build release
    loader = PyPILoader(swh_storage, url)
    release = loader.build_release(p_info, str(tmp_path), directory)

    # without comment_text and version in PKG-INFO, message should be empty
    assert (
        release.message
        == b"Synthetic release for PyPI source package GermlineFilter version 1.2\n"
    )
예제 #9
0
def test_filter_out_invalid_sdists(swh_storage, requests_mock):
    project_name = "swh-test-sdist-filtering"
    version = "1.0.0"
    url = f"https://pypi.org/project/{project_name}"
    json_url = f"https://pypi.org/pypi/{project_name}/json"

    common_sdist_entries = {
        "url": "",
        "comment_text": "",
        "digests": {"sha256": ""},
        "upload_time": "",
        "packagetype": "sdist",
    }

    requests_mock.get(
        json_url,
        json={
            "info": {
                "name": project_name,
            },
            "releases": {
                version: [
                    {
                        **common_sdist_entries,
                        "filename": f"{project_name}-{version}.{ext}",
                    }
                    for ext in ("tar.gz", "deb", "egg", "rpm", "whl")
                ]
            },
        },
    )

    loader = PyPILoader(swh_storage, url)

    packages = list(loader.get_package_info(version=version))

    assert len(packages) == 1
    assert packages[0][1].filename.endswith(".tar.gz")
예제 #10
0
def load_pypi(**kwargs):
    """Load PyPI package"""
    return PyPILoader.from_configfile(**kwargs).load()
예제 #11
0
def test_pypi_incremental_visit(swh_storage, requests_mock_datadir_visits):
    """With prior visit, 2nd load will result with a different snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    visit1_actual_load_status = loader.load()
    visit1_stats = get_stats(swh_storage)
    expected_snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e")
    assert visit1_actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
    )

    assert {
        "content": 6,
        "directory": 4,
        "origin": 1,
        "origin_visit": 1,
        "release": 2,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == visit1_stats

    # Reset internal state
    del loader._cached__raw_info
    del loader._cached_info

    visit2_actual_load_status = loader.load()
    visit2_stats = get_stats(swh_storage)

    assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status
    expected_snapshot_id2 = hash_to_bytes("77febe6ff0faf6cc00dd015a6c9763579a9fb6c7")
    assert visit2_actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id2.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id2
    )

    expected_snapshot = Snapshot(
        id=expected_snapshot_id2,
        branches={
            b"releases/1.1.0": SnapshotBranch(
                target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.2.0": SnapshotBranch(
                target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.3.0": SnapshotBranch(
                target=hash_to_bytes("a21b09cbec8e31f47307f196bb1f939effc26e11"),
                target_type=TargetType.RELEASE,
            ),
            b"HEAD": SnapshotBranch(
                target=b"releases/1.3.0",
                target_type=TargetType.ALIAS,
            ),
        },
    )

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
    )

    check_snapshot(expected_snapshot, swh_storage)

    assert {
        "content": 6 + 1,  # 1 more content
        "directory": 4 + 2,  # 2 more directories
        "origin": 1,
        "origin_visit": 1 + 1,
        "release": 2 + 1,  # 1 more release
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1 + 1,  # 1 more snapshot
    } == visit2_stats

    urls = [
        m.url
        for m in requests_mock_datadir_visits.request_history
        if m.url.startswith("https://files.pythonhosted.org")
    ]
    # visited each artifact once across 2 visits
    assert len(urls) == len(set(urls))
예제 #12
0
def test_pypi_multiple_visits_with_no_change(swh_storage, requests_mock_datadir):
    """Multiple visits with no changes results in 1 same snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": snapshot_id.hex(),
    }
    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=snapshot_id
    )

    expected_snapshot = Snapshot(
        id=snapshot_id,
        branches={
            b"releases/1.1.0": SnapshotBranch(
                target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.2.0": SnapshotBranch(
                target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"),
                target_type=TargetType.RELEASE,
            ),
            b"HEAD": SnapshotBranch(
                target=b"releases/1.2.0",
                target_type=TargetType.ALIAS,
            ),
        },
    )
    check_snapshot(expected_snapshot, swh_storage)

    stats = get_stats(swh_storage)

    assert {
        "content": 6,
        "directory": 4,
        "origin": 1,
        "origin_visit": 1,
        "release": 2,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats

    actual_load_status2 = loader.load()
    assert actual_load_status2 == {
        "status": "uneventful",
        "snapshot_id": actual_load_status2["snapshot_id"],
    }

    visit_status2 = assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi"
    )

    stats2 = get_stats(swh_storage)
    expected_stats2 = stats.copy()
    expected_stats2["origin_visit"] = 1 + 1
    assert expected_stats2 == stats2

    # same snapshot
    assert visit_status2.snapshot == snapshot_id
예제 #13
0
def test_pypi_release_metadata_structure(
    swh_storage, requests_mock_datadir, _0805nexter_api_info
):
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "eventful"
    assert actual_load_status["snapshot_id"] is not None

    expected_release_id = hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68")

    expected_snapshot = Snapshot(
        id=hash_to_bytes(actual_load_status["snapshot_id"]),
        branches={
            b"HEAD": SnapshotBranch(
                target=b"releases/1.2.0",
                target_type=TargetType.ALIAS,
            ),
            b"releases/1.1.0": SnapshotBranch(
                target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.2.0": SnapshotBranch(
                target=expected_release_id,
                target_type=TargetType.RELEASE,
            ),
        },
    )

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
    )

    check_snapshot(expected_snapshot, swh_storage)

    release = swh_storage.release_get([expected_release_id])[0]
    assert release is not None

    release_swhid = CoreSWHID(
        object_type=ObjectType.RELEASE, object_id=expected_release_id
    )
    directory_swhid = ExtendedSWHID(
        object_type=ExtendedObjectType.DIRECTORY, object_id=release.target
    )
    metadata_authority = MetadataAuthority(
        type=MetadataAuthorityType.FORGE,
        url="https://pypi.org/",
    )
    expected_metadata = [
        RawExtrinsicMetadata(
            target=directory_swhid,
            authority=metadata_authority,
            fetcher=MetadataFetcher(
                name="swh.loader.package.pypi.loader.PyPILoader",
                version=__version__,
            ),
            discovery_date=loader.visit_date,
            format="pypi-project-json",
            metadata=json.dumps(
                json.loads(_0805nexter_api_info)["releases"]["1.2.0"][0]
            ).encode(),
            origin=url,
            release=release_swhid,
        )
    ]
    assert swh_storage.raw_extrinsic_metadata_get(
        directory_swhid,
        metadata_authority,
    ) == PagedResult(
        next_page_token=None,
        results=expected_metadata,
    )