Esempio n. 1
0
def test_deposit_loading_ok_3(swh_storage, deposit_client,
                              requests_mock_datadir):
    """Deposit loading can happen on tarball artifacts as well

    The latest deposit changes introduce the internal change.

    """
    external_id = "hal-123456"
    url = f"https://hal-test.archives-ouvertes.fr/{external_id}"
    deposit_id = 888
    loader = DepositLoader(swh_storage, url, deposit_id, deposit_client)

    actual_load_status = loader.load()
    expected_snapshot_id = "4677843de89e398f1d6bfedc9ca9b89c451c55c8"

    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id,
    }
    assert_last_visit_matches(
        loader.storage,
        url,
        status="full",
        type="deposit",
        snapshot=hash_to_bytes(expected_snapshot_id),
    )
Esempio n. 2
0
    def test_load_filter_branches(self):
        filtered_branches = {b"refs/pull/42/merge"}
        unfiltered_branches = {b"refs/pull/42/head"}

        # Add branches to the repository on disk; some should be filtered by
        # the loader, some should not.
        for branch_name in filtered_branches | unfiltered_branches:
            self.repo[branch_name] = self.repo[b"refs/heads/master"]

        # Generate the expected snapshot from SNAPSHOT1 (which is the original
        # state of the git repo)...
        branches = dict(SNAPSHOT1.branches)

        # ... and the unfiltered_branches, which are all pointing to the same
        # commit as "refs/heads/master".
        for branch_name in unfiltered_branches:
            branches[branch_name] = branches[b"refs/heads/master"]

        expected_snapshot = Snapshot(branches=branches)

        # Load the modified repository
        res = self.loader.load()
        assert res == {"status": "eventful"}

        check_snapshot(expected_snapshot, self.loader.storage)
        assert_last_visit_matches(
            self.loader.storage,
            self.repo_url,
            status="full",
            type="git",
            snapshot=expected_snapshot.id,
        )
Esempio n. 3
0
    def test_load(self):
        """Loads a simple repository (made available by `setUp()`),
        and checks everything was added in the storage."""
        res = self.loader.load()

        assert res == {"status": "eventful"}

        assert_last_visit_matches(
            self.loader.storage,
            self.repo_url,
            status="full",
            type="git",
            snapshot=SNAPSHOT1.id,
        )

        stats = get_stats(self.loader.storage)
        assert stats == {
            "content": 4,
            "directory": 7,
            "origin": 1,
            "origin_visit": 1,
            "release": 0,
            "revision": 7,
            "skipped_content": 0,
            "snapshot": 1,
        }

        check_snapshot(SNAPSHOT1, self.loader.storage)
def test_loader_incremental(swh_storage, requests_mock_datadir):
    """Ensure a second visit do not download artifact already
    downloaded by the previous visit.

    """
    loader = NixGuixLoader(swh_storage, sources_url)
    load_status = loader.load()

    loader.load()
    assert load_status == {
        "status": "eventful",
        "snapshot_id": SNAPSHOT1.id.hex()
    }

    assert_last_visit_matches(
        swh_storage,
        sources_url,
        status="partial",
        type="nixguix",
        snapshot=SNAPSHOT1.id,
    )

    check_snapshot(SNAPSHOT1, storage=swh_storage)

    urls = [
        m.url for m in requests_mock_datadir.request_history
        if m.url == ("https://github.com/owner-1/repository-1/revision-1.tgz")
    ]
    # The artifact
    # 'https://github.com/owner-1/repository-1/revision-1.tgz' is only
    # visited one time
    assert len(urls) == 1
Esempio n. 5
0
def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadir):
    """With no prior visit, load a pypi project ends up with 1 snapshot"""
    url = "https://pypi.org/project/nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("1394b2e59351a944cc763bd9d26d90ce8e8121a8")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
    )

    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch(
                target=hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4"),
                target_type=TargetType.RELEASE,
            ),
            b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch(
                target=hash_to_bytes("732bb9dc087e6015884daaebb8b82559be729b5a"),
                target_type=TargetType.RELEASE,
            ),
        },
    )
    check_snapshot(expected_snapshot, swh_storage)
def test_archive_visit_with_no_artifact_found(swh_storage,
                                              requests_mock_datadir):
    url = URL
    unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz"
    loader = ArchiveLoader(
        swh_storage,
        url,
        artifacts=[{
            "time": 944729610,
            "url": unknown_artifact_url,  # unknown artifact
            "length": 221837,
            "filename": "8sync-0.1.0.tar.gz",
            "version": "0.1.0",
        }],
    )

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "uneventful"
    assert actual_load_status["snapshot_id"] is not None
    stats = get_stats(swh_storage)

    assert {
        "content": 0,
        "directory": 0,
        "origin": 1,
        "origin_visit": 1,
        "release": 0,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats

    assert_last_visit_matches(swh_storage, url, status="partial", type="tar")
def test_nixguix_url_not_found(swh_storage, requests_mock_datadir):
    """When failing to read from the url, the visit is marked as not_found.

    Here the sources url does not exist, so requests_mock_datadir returns a 404.
    Resulting in a NotFound raised within the package loader's main loop.

    This results in the task with status failed and a visit_status with status
    "not_found".

    """
    unknown_url = "https://non-existing-url/"
    loader = NixGuixLoader(swh_storage, unknown_url)
    # during the retrieval step
    load_status = loader.load()

    assert load_status == {"status": "failed"}

    assert_last_visit_matches(swh_storage,
                              unknown_url,
                              status="not_found",
                              type="nixguix",
                              snapshot=None)

    assert len(requests_mock_datadir.request_history) == 1
    assert requests_mock_datadir.request_history[0].url == unknown_url
def test_archive_snapshot_append(swh_storage, requests_mock_datadir):
    # first loading with a first artifact
    artifact1 = GNU_ARTIFACTS[0]
    loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
    actual_load_status = loader.load()
    assert actual_load_status["status"] == "eventful"
    assert actual_load_status["snapshot_id"] is not None
    assert_last_visit_matches(swh_storage, URL, status="full", type="tar")

    # check expected snapshot
    snapshot = loader.last_snapshot()
    assert len(snapshot.branches) == 2
    branch_artifact1_name = f"releases/{artifact1['version']}".encode()
    assert b"HEAD" in snapshot.branches
    assert branch_artifact1_name in snapshot.branches
    assert snapshot.branches[b"HEAD"].target == branch_artifact1_name

    # second loading with a second artifact
    artifact2 = GNU_ARTIFACTS[1]
    loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
    actual_load_status = loader.load()
    assert actual_load_status["status"] == "eventful"
    assert actual_load_status["snapshot_id"] is not None
    assert_last_visit_matches(swh_storage, URL, status="full", type="tar")

    # check expected snapshot, should contain a new branch and the
    # branch for the first artifact
    snapshot = loader.last_snapshot()
    assert len(snapshot.branches) == 3
    branch_artifact2_name = f"releases/{artifact2['version']}".encode()
    assert b"HEAD" in snapshot.branches
    assert branch_artifact2_name in snapshot.branches
    assert branch_artifact1_name in snapshot.branches
    assert snapshot.branches[b"HEAD"].target == branch_artifact2_name
def test_arch_invalid_origin_archive_not_found(swh_storage,
                                               requests_mock_datadir):
    url = "https://nowhere/packages/42"
    loader = ArchLoader(
        swh_storage,
        url,
        artifacts=[
            {
                "filename": "42-0.0.1.pkg.xz",
                "url": "https://mirror2.nowhere/pkg/42-0.0.1.pkg.xz",
                "version": "0.0.1",
                "arch": "aarch64",
                "name": "42",
                "repo": "community",
                "length": 42,
                "last_modified": "2022-04-07T21:08:14",
            },
        ],
    )
    with pytest.raises(Exception):
        assert loader.load() == {"status": "failed"}
        assert_last_visit_matches(swh_storage,
                                  url,
                                  status="not_found",
                                  type="arch",
                                  snapshot=None)
def test_multiple_open_heads(swh_storage, datadir, tmp_path):
    archive_name = "multiple-heads"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    loader = HgLoader(
        storage=swh_storage,
        url=repo_url,
    )

    actual_load_status = loader.load()
    assert actual_load_status == {"status": "eventful"}

    assert_last_visit_matches(swh_storage, repo_url, status="full", type="hg")

    snapshot = snapshot_get_latest(swh_storage, repo_url)
    expected_branches = [
        b"HEAD",
        b"branch-heads/default/0",
        b"branch-heads/default/1",
        b"branch-tip/default",
    ]
    assert sorted(snapshot.branches.keys()) == expected_branches

    # Check that we don't load anything the second time
    loader = HgLoader(
        storage=swh_storage,
        url=repo_url,
    )

    actual_load_status = loader.load()

    assert actual_load_status == {"status": "uneventful"}
Esempio n. 11
0
def test_loader_external_in_versioned_path(swh_storage, repo_url,
                                           external_repo_url, tmp_path):
    # first commit on external
    add_commit(
        external_repo_url,
        "Create a file in an external repository",
        [
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="src/foo.sh",
                data=b"#!/bin/bash\necho foo",
            ),
        ],
    )

    # first commit
    add_commit(
        repo_url,
        "Add trunk/src dir",
        [
            CommitChange(change_type=CommitChangeType.AddOrUpdate,
                         path="trunk/src/")
        ],
    )

    # second commit
    add_commit(
        repo_url,
        "Add a file in trunk/src directory and set external on trunk targeting src",
        [
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="trunk/src/bar.sh",
                data=b"#!/bin/bash\necho bar",
            ),
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="trunk/",
                properties={
                    "svn:externals":
                    (f"{svn_urljoin(external_repo_url, 'src')} src")
                },
            ),
        ],
    )

    loader = SvnLoader(
        swh_storage,
        repo_url,
        temp_directory=tmp_path,
        check_revision=1,
    )
    assert loader.load() == {"status": "eventful"}
    assert_last_visit_matches(
        loader.storage,
        repo_url,
        status="full",
        type="svn",
    )
    check_snapshot(loader.snapshot, loader.storage)
Esempio n. 12
0
def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir):
    """With no time upload, artifact is skipped"""
    package = "jammit-express"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("33b8f105d48ce16b6c59158af660e0cc78bcbef4")

    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    # artifact is used
    expected_snapshot = Snapshot(
        id=expected_snapshot_id,
        branches={
            b"HEAD": SnapshotBranch(
                target_type=TargetType.ALIAS, target=b"releases/0.0.1"
            ),
            b"releases/0.0.1": SnapshotBranch(
                target_type=TargetType.RELEASE,
                target=hash_to_bytes("3e3b800570869fa9b3dbc302500553e62400cc06"),
            ),
        },
    )

    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id
    )

    check_snapshot(expected_snapshot, swh_storage)
Esempio n. 13
0
def test_pypi_release_with_traceback(swh_storage, requests_mock_datadir):
    url = "https://pypi.org/project/0805nexter"
    with patch(
        "swh.loader.package.pypi.loader.PyPILoader.last_snapshot",
        side_effect=ValueError("Fake problem to fail the visit"),
    ):
        loader = PyPILoader(swh_storage, url)

        actual_load_status = loader.load()
        assert actual_load_status == {"status": "failed"}

        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")

        stats = get_stats(swh_storage)

        assert {
            "content": 0,
            "directory": 0,
            "origin": 1,
            "origin_visit": 1,
            "release": 0,
            "revision": 0,
            "skipped_content": 0,
            "snapshot": 0,
        } == stats
Esempio n. 14
0
def test_pypi_no_release_artifact(swh_storage, requests_mock_datadir_missing_all):
    """Load a pypi project with all artifacts missing ends up with no snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "uneventful"
    assert actual_load_status["snapshot_id"] is not None

    empty_snapshot = Snapshot(branches={})

    assert_last_visit_matches(
        swh_storage, url, status="partial", type="pypi", snapshot=empty_snapshot.id
    )

    stats = get_stats(swh_storage)
    assert {
        "content": 0,
        "directory": 0,
        "origin": 1,
        "origin_visit": 1,
        "release": 0,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats
Esempio n. 15
0
def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits):
    package = "org"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)

    expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249")
    actual_load_status = loader.load()
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }
    assert_last_visit_matches(
        swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
    )

    stats = get_stats(swh_storage)

    assert {
        "content": len(_expected_new_contents_first_visit),
        "directory": len(_expected_new_directories_first_visit),
        "origin": 1,
        "origin_visit": 1,
        "release": len(_expected_new_releases_first_visit),
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats

    # reset loader internal state
    del loader._cached_info
    del loader._cached__raw_info

    actual_load_status2 = loader.load()
    assert actual_load_status2["status"] == "eventful"
    snap_id2 = actual_load_status2["snapshot_id"]
    assert snap_id2 is not None
    assert snap_id2 != actual_load_status["snapshot_id"]

    assert_last_visit_matches(swh_storage, url, status="full", type="npm")

    stats = get_stats(swh_storage)

    assert {  # 3 new releases artifacts
        "content": len(_expected_new_contents_first_visit) + 14,
        "directory": len(_expected_new_directories_first_visit) + 15,
        "origin": 1,
        "origin_visit": 2,
        "release": len(_expected_new_releases_first_visit) + 3,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 2,
    } == stats

    urls = [
        m.url
        for m in requests_mock_datadir_visits.request_history
        if m.url.startswith("https://registry.npmjs.org")
    ]
    assert len(urls) == len(set(urls))  # we visited each artifact once across
Esempio n. 16
0
def test_assert_last_visit_matches_wrong_status(mock_storage, mocker):
    """Wrong visit detected should raise AssertionError"""
    expected_status = "partial"
    assert ORIGIN_VISIT_STATUS.status != expected_status
    with pytest.raises(AssertionError, match="Visit_status has status"):
        assert_last_visit_matches(mock_storage, "url", status=expected_status)

    assert mock_storage.called is True
Esempio n. 17
0
def test_npm_origin_not_found(swh_storage, requests_mock_datadir):
    url = package_url("non-existent-url")
    loader = NpmLoader(swh_storage, url)

    assert loader.load() == {"status": "failed"}

    assert_last_visit_matches(
        swh_storage, url, status="not_found", type="npm", snapshot=None
    )
Esempio n. 18
0
def test_pypi_origin_not_found(swh_storage, requests_mock_datadir):
    url = "https://pypi.org/project/unknown"
    loader = PyPILoader(swh_storage, url)

    assert loader.load() == {"status": "failed"}

    assert_last_visit_matches(
        swh_storage, url, status="not_found", type="pypi", snapshot=None
    )
Esempio n. 19
0
def test_assert_last_visit_matches_raise(mock_storage, mocker):
    """Not finding origin visit_and_statu should raise"""
    # overwrite so we raise because we do not find the right visit
    mock_storage.return_value = None

    with pytest.raises(AssertionError, match="Origin url has no visits"):
        assert_last_visit_matches(mock_storage, "url", status="full")

    assert mock_storage.called is True
def test_load_unchanged_repo_should_be_uneventful(
    swh_storage,
    datadir,
    tmp_path,
):
    """Checks the loader can find which revisions it already loaded, using ExtIDs."""
    archive_name = "hello"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)
    repo_path = repo_url.replace("file://", "")

    loader = HgLoader(swh_storage, repo_path)

    assert loader.load() == {"status": "eventful"}
    assert get_stats(loader.storage) == {
        "content": 3,
        "directory": 3,
        "origin": 1,
        "origin_visit": 1,
        "release": 1,
        "revision": 3,
        "skipped_content": 0,
        "snapshot": 1,
    }
    visit_status = assert_last_visit_matches(
        loader.storage,
        repo_path,
        type=RevisionType.MERCURIAL.value,
        status="full",
    )
    assert visit_status.snapshot is not None

    # Create a new loader (to start with a clean slate, eg. remove the caches),
    # with the new, partial, storage
    loader2 = HgLoader(swh_storage, repo_path)
    assert loader2.load() == {"status": "uneventful"}

    # Should have all the objects
    assert get_stats(loader.storage) == {
        "content": 3,
        "directory": 3,
        "origin": 1,
        "origin_visit": 2,
        "release": 1,
        "revision": 3,
        "skipped_content": 0,
        "snapshot": 1,
    }
    visit_status2 = assert_last_visit_matches(
        loader2.storage,
        repo_path,
        type=RevisionType.MERCURIAL.value,
        status="full",
    )
    assert visit_status2.snapshot == visit_status.snapshot
def test_loader_hg_extid_filtering(swh_storage, datadir, tmp_path):
    """The first visit of a fork should filter already seen revisions (through extids)"""
    archive_name = "the-sandbox"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    loader = HgLoader(swh_storage, url=repo_url)

    assert loader.load() == {"status": "eventful"}
    stats = get_stats(loader.storage)
    expected_stats = {
        "content": 2,
        "directory": 3,
        "origin": 1,
        "origin_visit": 1,
        "release": 0,
        "revision": 58,
        "skipped_content": 0,
        "snapshot": 1,
    }
    assert stats == expected_stats

    visit_status = assert_last_visit_matches(
        loader.storage,
        repo_url,
        status="full",
        type="hg",
    )

    # Make a fork of the first repository we ingested
    fork_url = prepare_repository_from_archive(archive_path,
                                               "the-sandbox-reloaded",
                                               tmp_path)
    loader2 = HgLoader(swh_storage,
                       url=fork_url,
                       directory=str(tmp_path / archive_name))

    assert loader2.load() == {"status": "uneventful"}

    stats = get_stats(loader.storage)
    expected_stats2 = expected_stats.copy()
    expected_stats2.update({
        "origin": 1 + 1,
        "origin_visit": 1 + 1,
    })
    assert stats == expected_stats2

    visit_status2 = assert_last_visit_matches(
        loader.storage,
        fork_url,
        status="full",
        type="hg",
    )
    assert visit_status.snapshot is not None
    assert visit_status2.snapshot == visit_status.snapshot
Esempio n. 22
0
def test_deposit_loading_failure_to_retrieve_1_artifact(
        swh_storage, deposit_client, requests_mock_datadir_missing_one):
    """Deposit with missing artifact ends up with an uneventful/partial visit"""
    # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
    url = "some-url-2"
    deposit_id = 666
    requests_mock_datadir_missing_one.put(re.compile("https"))
    loader = DepositLoader(swh_storage,
                           url,
                           deposit_id,
                           deposit_client,
                           default_filename="archive.zip")

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "uneventful"
    assert actual_load_status["snapshot_id"] is not None

    assert_last_visit_matches(loader.storage,
                              url,
                              status="partial",
                              type="deposit")

    stats = get_stats(loader.storage)
    assert {
        "content": 0,
        "directory": 0,
        "origin": 1,
        "origin_visit": 1,
        "release": 0,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats

    # Retrieve the information for deposit status update query to the deposit
    urls = [
        m for m in requests_mock_datadir_missing_one.request_history
        if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
    ]

    assert len(urls) == 1
    update_query = urls[0]

    body = update_query.json()
    expected_body = {
        "status": "failed",
        "status_detail": {
            "loading": [
                "Failed to load branch HEAD for some-url-2: Fail to query "
                "'https://deposit.softwareheritage.org/1/private/666/raw/'. Reason: 404"
            ]
        },
    }

    assert body == expected_body
Esempio n. 23
0
def test_npm_no_artifact(swh_storage, requests_mock_datadir):
    """If no artifacts at all is found for origin, the visit fails completely"""
    package = "catify"
    url = package_url(package)
    loader = NpmLoader(swh_storage, url)
    actual_load_status = loader.load()
    assert actual_load_status == {
        "status": "failed",
    }

    assert_last_visit_matches(swh_storage, url, status="failed", type="npm")
def test_archive_visit_no_time_for_tarball(swh_storage, requests_mock_datadir):
    artifacts = copy.deepcopy(GNU_ARTIFACTS)
    for artifact in artifacts:
        artifact["time"] = None

    loader = ArchiveLoader(swh_storage, URL, artifacts=artifacts)

    actual_load_status = loader.load()
    assert actual_load_status["status"] == "eventful"

    assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
Esempio n. 25
0
def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path,
                                           mocker):

    # first commit
    add_commit(
        repo_url,
        "Create repository structure.",
        [
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="branches/",
            ),
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="tags/",
            ),
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="trunk/",
            ),
        ],
    )

    # second commit
    add_commit(
        repo_url,
        ("Set svn:externals property on trunk/externals path of repository to load."
         "The externals URLs are not valid."),
        [
            CommitChange(
                change_type=CommitChangeType.AddOrUpdate,
                path="trunk/externals/",
                properties={
                    "svn:externals":
                    ("file:///tmp/invalid/svn/repo/hello hello\n"
                     "file:///tmp/invalid/svn/repo/foo.sh foo.sh")
                },
            ),
        ],
    )

    loader = SvnLoader(swh_storage,
                       repo_url,
                       temp_directory=tmp_path,
                       check_revision=1)
    assert loader.load() == {"status": "eventful"}
    assert_last_visit_matches(
        loader.storage,
        repo_url,
        status="full",
        type="svn",
    )
    check_snapshot(loader.snapshot, loader.storage)
def test_visit_repository_with_transplant_operations(swh_storage, datadir,
                                                     tmp_path):
    """Visit a mercurial repository visit transplant operations within should yield a
    snapshot as well.

    """

    archive_name = "transplant"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    loader = HgLoader(
        swh_storage,
        url=repo_url,
        visit_date=VISIT_DATE,
    )

    # load hg repository
    actual_load_status = loader.load()
    assert actual_load_status == {"status": "eventful"}

    # collect swh revisions
    assert_last_visit_matches(loader.storage,
                              repo_url,
                              type=RevisionType.MERCURIAL.value,
                              status="full")

    revisions = []
    snapshot = snapshot_get_latest(loader.storage, repo_url)
    for branch in snapshot.branches.values():
        if branch.target_type.value != "revision":
            continue
        revisions.append(branch.target)

    # extract original changesets info and the transplant sources
    hg_changesets = set()
    transplant_sources = set()
    for rev in loader.storage.revision_log(revisions):
        extids = list(
            loader.storage.extid_get_from_target(ObjectType.REVISION,
                                                 [rev["id"]]))
        assert len(extids) == 1
        hg_changesets.add(hash_to_hex(extids[0].extid))
        for k, v in rev["extra_headers"]:
            if k == b"transplant_source":
                transplant_sources.add(v.decode("ascii"))

    # check extracted data are valid
    assert len(hg_changesets) > 0
    assert len(transplant_sources) > 0
    assert transplant_sources <= hg_changesets
Esempio n. 27
0
def test_assert_last_visit_matches_wrong_type(mock_storage, mocker):
    """Wrong visit detected should raise AssertionError"""
    expected_type = "git"
    assert ORIGIN_VISIT.type != expected_type
    with pytest.raises(AssertionError, match="Visit has type"):
        assert_last_visit_matches(
            mock_storage,
            "url",
            status=ORIGIN_VISIT_STATUS.status,
            type=expected_type,  # mismatched type will raise
        )

    assert mock_storage.called is True
def test_cran_fail_to_build_or_load_extrinsic_metadata(
    method_name, swh_storage, requests_mock_datadir
):
    """problem during loading: {visit: failed, status: failed, no snapshot}"""
    version = "2.22-6"
    base_url = "https://cran.r-project.org"
    origin_url = f"{base_url}/Packages/Recommended_KernSmooth/index.html"
    artifact_url = (
        f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz"  # noqa
    )

    full_method_name = f"swh.loader.package.cran.loader.CRANLoader.{method_name}"
    with patch(
        full_method_name,
        side_effect=ValueError("Fake to fail to build or load extrinsic metadata"),
    ):
        loader = CRANLoader(
            swh_storage,
            origin_url,
            artifacts=[
                {
                    "url": artifact_url,
                    "version": version,
                    "package": "Recommended_KernSmooth",
                }
            ],
        )

        actual_load_status = loader.load()

        assert actual_load_status == {
            "status": "failed",
            "snapshot_id": SNAPSHOT.id.hex(),
        }

        visit_stats = get_stats(swh_storage)
        assert {
            "content": 33,
            "directory": 7,
            "origin": 1,
            "origin_visit": 1,
            "release": 1,
            "revision": 0,
            "skipped_content": 0,
            "snapshot": 1,
        } == visit_stats

        assert_last_visit_matches(
            swh_storage, origin_url, status="partial", type="cran", snapshot=SNAPSHOT.id
        )
def test_evaluation_branch(swh_storage, requests_mock_datadir):
    loader = NixGuixLoader(swh_storage, sources_url)
    res = loader.load()
    assert res["status"] == "eventful"

    assert_last_visit_matches(
        swh_storage,
        sources_url,
        status="partial",
        type="nixguix",
        snapshot=SNAPSHOT1.id,
    )

    check_snapshot(SNAPSHOT1, storage=swh_storage)
Esempio n. 30
0
def test_pypi_visit_with_missing_artifact(
    swh_storage, requests_mock_datadir_missing_one
):
    """Load a pypi project with some missing artifacts ends up with 1 snapshot"""
    url = "https://pypi.org/project/0805nexter"
    loader = PyPILoader(swh_storage, url)

    actual_load_status = loader.load()
    expected_snapshot_id = hash_to_bytes("00785a38479abe5fbfa402df96be26d2ddf89c97")
    assert actual_load_status == {
        "status": "eventful",
        "snapshot_id": expected_snapshot_id.hex(),
    }

    assert_last_visit_matches(
        swh_storage,
        url,
        status="partial",
        type="pypi",
        snapshot=expected_snapshot_id,
    )

    expected_snapshot = Snapshot(
        id=hash_to_bytes(expected_snapshot_id),
        branches={
            b"releases/1.2.0": SnapshotBranch(
                target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"),
                target_type=TargetType.RELEASE,
            ),
            b"HEAD": SnapshotBranch(
                target=b"releases/1.2.0",
                target_type=TargetType.ALIAS,
            ),
        },
    )
    check_snapshot(expected_snapshot, storage=swh_storage)

    stats = get_stats(swh_storage)

    assert {
        "content": 3,
        "directory": 2,
        "origin": 1,
        "origin_visit": 1,
        "release": 1,
        "revision": 0,
        "skipped_content": 0,
        "snapshot": 1,
    } == stats