Python snapshot_get_latest Examples, swh.storage.algos.snapshot.snapshot_get_latest Python Examples

Example #1

0

Show file

File: test_loader.py Project: SoftwareHeritage/swh-loader-mercurial

def test_multiple_open_heads(swh_storage, datadir, tmp_path):
    archive_name = "multiple-heads"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    loader = HgLoader(
        storage=swh_storage,
        url=repo_url,
    )

    actual_load_status = loader.load()
    assert actual_load_status == {"status": "eventful"}

    assert_last_visit_matches(swh_storage, repo_url, status="full", type="hg")

    snapshot = snapshot_get_latest(swh_storage, repo_url)
    expected_branches = [
        b"HEAD",
        b"branch-heads/default/0",
        b"branch-heads/default/1",
        b"branch-tip/default",
    ]
    assert sorted(snapshot.branches.keys()) == expected_branches

    # Check that we don't load anything the second time
    loader = HgLoader(
        storage=swh_storage,
        url=repo_url,
    )

    actual_load_status = loader.load()

    assert actual_load_status == {"status": "uneventful"}

Example #2

0

Show file

    def _latest_snapshot_revision(
        self,
        origin_url: str,
    ) -> Optional[Tuple[Snapshot, Revision]]:
        """Look for latest snapshot revision and returns it if any.

        Args:
            origin_url: Origin identifier
            previous_swh_revision: possible previous swh revision (either a dict or
                revision identifier)

        Returns:
            Tuple of the latest Snapshot from the previous visit and its targeted
            revision if any or None otherwise.

        """
        storage = self.storage
        latest_snapshot = snapshot_get_latest(storage, origin_url)
        if not latest_snapshot:
            return None
        branches = latest_snapshot.branches
        if not branches:
            return None
        branch = branches.get(DEFAULT_BRANCH)
        if not branch:
            return None
        if branch.target_type != TargetType.REVISION:
            return None
        swh_id = branch.target

        revision = storage.revision_get([swh_id])[0]
        if not revision:
            return None
        return latest_snapshot, revision

Example #3

0

Show file

File: test_loader.py Project: SoftwareHeritage/swh-loader-mercurial

def test_load_repo_check_extids_write_version(swh_storage, datadir, tmp_path):
    """ExtIDs should be stored with a given version when loading is done"""
    archive_name = "hello"
    archive_path = Path(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    hg_strip(repo_url.replace("file://", ""), "tip")
    loader = HgLoader(swh_storage, repo_url)
    assert loader.load() == {"status": "eventful"}

    # Ensure we write ExtIDs to a specific version.
    snapshot = snapshot_get_latest(swh_storage, repo_url)

    # First, filter out revisions from that snapshot
    revision_ids = [
        branch.target for branch in snapshot.branches.values()
        if branch.target_type == TargetType.REVISION
    ]

    assert len(revision_ids) > 0

    # Those revisions should have their associated ExtID version set to EXTID_VERSION
    extids = swh_storage.extid_get_from_target(ObjectType.REVISION,
                                               revision_ids)

    assert len(extids) == len(revision_ids)
    for extid in extids:
        assert extid.extid_version == EXTID_VERSION

Example #4

0

Show file

File: test_loader.py Project: SoftwareHeritage/swh-loader-mercurial

def _partial_copy_storage(old_storage, origin_url: str, mechanism: str,
                          copy_revisions: bool):
    """Create a new storage, and only copy ExtIDs or head revisions to it."""
    new_storage = get_storage(cls="memory")
    snapshot = snapshot_get_latest(old_storage, origin_url)
    assert snapshot
    heads = [branch.target for branch in snapshot.branches.values()]

    if mechanism == "extid":
        extids = old_storage.extid_get_from_target(ObjectType.REVISION, heads)
        new_storage.extid_add(extids)
        if copy_revisions:
            # copy revisions, but erase their metadata to make sure the loader doesn't
            # fallback to revision.metadata["nodeid"]
            revisions = [
                attr.evolve(rev, metadata={})
                for rev in old_storage.revision_get(heads) if rev
            ]
            new_storage.revision_add(revisions)

    else:
        assert mechanism == "same storage"
        return old_storage

    # copy origin, visit, status
    new_storage.origin_add(old_storage.origin_get([origin_url]))
    visit = old_storage.origin_visit_get_latest(origin_url)
    new_storage.origin_visit_add([visit])
    statuses = old_storage.origin_visit_status_get(origin_url,
                                                   visit.visit).results
    new_storage.origin_visit_status_add(statuses)
    new_storage.snapshot_add([snapshot])

    return new_storage

Example #5

0

Show file

File: strategies.py Project: shivam2003sy/swh-web

def _get_origin_dfs_revisions_walker():
    tests_data = get_tests_data()
    storage = tests_data["storage"]
    origin = random.choice(tests_data["origins"][:-1])
    snapshot = snapshot_get_latest(storage, origin["url"])
    if snapshot.branches[b"HEAD"].target_type.value == "alias":
        target = snapshot.branches[b"HEAD"].target
        head = snapshot.branches[target].target
    else:
        head = snapshot.branches[b"HEAD"].target
    return get_revisions_walker("dfs", storage, head)

Example #6

0

Show file

File: test_loader.py Project: SoftwareHeritage/swh-loader-mercurial

def test_visit_repository_with_transplant_operations(swh_storage, datadir,
                                                     tmp_path):
    """Visit a mercurial repository visit transplant operations within should yield a
    snapshot as well.

    """

    archive_name = "transplant"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name,
                                               tmp_path)

    loader = HgLoader(
        swh_storage,
        url=repo_url,
        visit_date=VISIT_DATE,
    )

    # load hg repository
    actual_load_status = loader.load()
    assert actual_load_status == {"status": "eventful"}

    # collect swh revisions
    assert_last_visit_matches(loader.storage,
                              repo_url,
                              type=RevisionType.MERCURIAL.value,
                              status="full")

    revisions = []
    snapshot = snapshot_get_latest(loader.storage, repo_url)
    for branch in snapshot.branches.values():
        if branch.target_type.value != "revision":
            continue
        revisions.append(branch.target)

    # extract original changesets info and the transplant sources
    hg_changesets = set()
    transplant_sources = set()
    for rev in loader.storage.revision_log(revisions):
        extids = list(
            loader.storage.extid_get_from_target(ObjectType.REVISION,
                                                 [rev["id"]]))
        assert len(extids) == 1
        hg_changesets.add(hash_to_hex(extids[0].extid))
        for k, v in rev["extra_headers"]:
            if k == b"transplant_source":
                transplant_sources.add(v.decode("ascii"))

    # check extracted data are valid
    assert len(hg_changesets) > 0
    assert len(transplant_sources) > 0
    assert transplant_sources <= hg_changesets

Example #7

0

Show file

File: strategies.py Project: shivam2003sy/swh-web

def origin_with_pull_request_branches():
    """
    Hypothesis strategy returning a random origin with pull request branches
    ingested into the test archive.
    """
    ret = []
    tests_data = get_tests_data()
    storage = tests_data["storage"]
    origins = storage.origin_list(limit=1000)
    for origin in origins.results:
        snapshot = snapshot_get_latest(storage, origin.url)
        if any([b"refs/pull/" in b for b in snapshot.branches]):
            ret.append(origin)
    return sampled_from(ret)

Example #8

0

Show file

File: test_snapshot.py Project: SoftwareHeritage/swh-storage

def test_snapshot_get_latest_none(swh_storage, sample_data):
    """Retrieve latest snapshot on unknown origin or origin without snapshot should
    yield no result

    """
    # unknown origin so None
    assert snapshot_get_latest(swh_storage, "unknown-origin") is None

    # no snapshot on origin visit so None
    origin = sample_data.origin
    swh_storage.origin_add([origin])
    origin_visit, origin_visit2 = sample_data.origin_visits[:2]
    assert origin_visit.origin == origin.url

    swh_storage.origin_visit_add([origin_visit])
    assert snapshot_get_latest(swh_storage, origin.url) is None

    ov1 = swh_storage.origin_visit_get_latest(origin.url)
    assert ov1 is not None

    # visit references a snapshot but the snapshot does not exist in backend for some
    # reason
    complete_snapshot = sample_data.snapshots[2]
    swh_storage.origin_visit_status_add([
        OriginVisitStatus(
            origin=origin.url,
            visit=ov1.visit,
            date=origin_visit2.date,
            status="partial",
            snapshot=complete_snapshot.id,
        )
    ])
    # so we do not find it
    assert snapshot_get_latest(swh_storage, origin.url) is None
    assert snapshot_get_latest(swh_storage, origin.url,
                               branches_count=1) is None

Example #9

0

Show file

File: strategies.py Project: shivam2003sy/swh-web

def origin_with_releases():
    """
    Hypothesis strategy returning a random origin ingested
    into the test archive.
    """
    ret = []
    tests_data = get_tests_data()
    for origin in tests_data["origins"]:
        snapshot = snapshot_get_latest(tests_data["storage"], origin["url"])
        if any([
                b.target_type.value == "release"
                for b in snapshot.branches.values()
        ]):
            ret.append(origin)
    return sampled_from(ret)

Example #10

0

Show file

def lookup_latest_origin_snapshot(
        origin: str,
        allowed_statuses: List[str] = None) -> Optional[Dict[str, Any]]:
    """Return information about the latest snapshot of an origin.

    .. warning:: At most 1000 branches contained in the snapshot
        will be returned for performance reasons.

    Args:
        origin: URL or integer identifier of the origin
        allowed_statuses: list of visit statuses considered
            to find the latest snapshot for the visit. For instance,
            ``allowed_statuses=['full']`` will only consider visits that
            have successfully run to completion.

    Returns:
        A dict filled with the snapshot content.
    """
    snp = snapshot_get_latest(storage,
                              origin,
                              allowed_statuses=allowed_statuses,
                              branches_count=1000)
    return converters.from_snapshot(snp.to_dict()) if snp is not None else None

Example #11

0

Show file

def _init_tests_data():
    # To hold reference to the memory storage
    storage = get_storage("memory")

    # Create search instance
    search = get_search("memory")
    search.initialize()
    search.origin_update({"url": origin["url"]} for origin in _TEST_ORIGINS)

    # Create indexer storage instance that will be shared by indexers
    idx_storage = get_indexer_storage("memory")

    # Declare a test tool for origin intrinsic metadata tests
    idx_tool = idx_storage.indexer_configuration_add([INDEXER_TOOL])[0]
    INDEXER_TOOL["id"] = idx_tool["id"]

    # Load git repositories from archives
    for origin in _TEST_ORIGINS:
        for i, archive_ in enumerate(origin["archives"]):
            if i > 0:
                # ensure visit dates will be different when simulating
                # multiple visits of an origin
                time.sleep(1)
            origin_repo_archive = os.path.join(os.path.dirname(__file__),
                                               "resources/repos/%s" % archive_)
            loader = GitLoaderFromArchive(
                storage,
                origin["url"],
                archive_path=origin_repo_archive,
            )

            result = loader.load()
            assert result["status"] == "eventful"

        ori = storage.origin_get([origin["url"]])[0]
        origin.update(ori.to_dict())  # add an 'id' key if enabled
        search.origin_update([{
            "url": origin["url"],
            "has_visits": True,
            "visit_types": ["git"]
        }])

    for i in range(250):
        _add_origin(storage,
                    search,
                    origin_url=f"https://many.origins/{i+1}",
                    visit_type="tar")

    sha1s: Set[Sha1] = set()
    directories = set()
    revisions = set()
    releases = set()
    snapshots = set()

    content_path = {}

    # Get all objects loaded into the test archive
    common_metadata = {ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}
    for origin in _TEST_ORIGINS:
        snp = snapshot_get_latest(storage, origin["url"])
        snapshots.add(hash_to_hex(snp.id))
        for branch_name, branch_data in snp.branches.items():
            target_type = branch_data.target_type.value
            if target_type == "revision":
                revisions.add(branch_data.target)
                if b"master" in branch_name:
                    # Add some origin intrinsic metadata for tests
                    metadata = common_metadata
                    metadata.update(origin.get("metadata", {}))
                    origin_metadata = OriginIntrinsicMetadataRow(
                        id=origin["url"],
                        from_revision=branch_data.target,
                        indexer_configuration_id=idx_tool["id"],
                        metadata=metadata,
                        mappings=[],
                    )
                    idx_storage.origin_intrinsic_metadata_add(
                        [origin_metadata])
                    search.origin_update([{
                        "url": origin["url"],
                        "intrinsic_metadata": metadata
                    }])

                    ORIGIN_MASTER_REVISION[origin["url"]] = hash_to_hex(
                        branch_data.target)
            elif target_type == "release":
                release = storage.release_get([branch_data.target])[0]
                revisions.add(release.target)
                releases.add(hash_to_hex(branch_data.target))

        for rev_log in storage.revision_shortlog(set(revisions)):
            rev_id = rev_log[0]
            revisions.add(rev_id)

        for rev in storage.revision_get(revisions):
            if rev is None:
                continue
            dir_id = rev.directory
            directories.add(hash_to_hex(dir_id))
            for entry in dir_iterator(storage, dir_id):
                if entry["type"] == "file":
                    sha1s.add(entry["sha1"])
                    content_path[entry["sha1"]] = "/".join(
                        [hash_to_hex(dir_id), entry["path"].decode("utf-8")])
                elif entry["type"] == "dir":
                    directories.add(hash_to_hex(entry["target"]))

    _add_extra_contents(storage, sha1s)

    # Get all checksums for each content
    result: List[Optional[Content]] = storage.content_get(list(sha1s))

    contents: List[Dict] = []
    for content in result:
        assert content is not None
        sha1 = hash_to_hex(content.sha1)
        content_metadata = {
            algo: hash_to_hex(getattr(content, algo))
            for algo in DEFAULT_ALGORITHMS
        }

        path = ""
        if content.sha1 in content_path:
            path = content_path[content.sha1]

        cnt_data = storage.content_get_data(content.sha1)
        assert cnt_data is not None
        mimetype, encoding = get_mimetype_and_encoding_for_content(cnt_data)
        _, _, cnt_data = _re_encode_content(mimetype, encoding, cnt_data)
        content_display_data = prepare_content_for_display(
            cnt_data, mimetype, path)

        content_metadata.update({
            "path":
            path,
            "mimetype":
            mimetype,
            "encoding":
            encoding,
            "hljs_language":
            content_display_data["language"],
            "data":
            content_display_data["content_data"],
        })
        _contents[sha1] = content_metadata
        contents.append(content_metadata)

    # Add the empty directory to the test archive
    storage.directory_add([Directory(entries=())])

    # Add empty content to the test archive
    storage.content_add([Content.from_data(data=b"")])

    # Add fake git origin with pull request branches
    _add_origin(
        storage,
        search,
        origin_url="https://git.example.org/project",
        snapshot_branches={
            b"refs/heads/master": {
                "target_type": "revision",
                "target": next(iter(revisions)),
            },
            **{
                f"refs/pull/{i}".encode(): {
                    "target_type": "revision",
                    "target": next(iter(revisions)),
                }
                for i in range(300)
            },
        },
    )

    # Return tests data
    return {
        "search": search,
        "storage": storage,
        "idx_storage": idx_storage,
        "origins": _TEST_ORIGINS,
        "contents": contents,
        "directories": list(directories),
        "releases": list(releases),
        "revisions": list(map(hash_to_hex, revisions)),
        "snapshots": list(snapshots),
        "generated_checksums": set(),
    }

Example #12

0

Show file

 def snapshot_get_latest(self, origin_url):
     snp = snapshot_get_latest(self.storage, origin_url)
     return converters.from_snapshot(snp.to_dict())

Example #13

0

Show file

File: loader.py Project: SoftwareHeritage/swh-loader-git

 def get_full_snapshot(self, origin_url) -> Optional[Snapshot]:
     return snapshot_get_latest(self.storage, origin_url)

Example #14

0

Show file

def test_load_upgrade_from_revision_extids(caplog):
    """Tests that, when loading incrementally based on a snapshot made by an old
    version of the loader, the loader will convert revisions to releases
    and add them to the storage.

    Also checks that, if an extid exists pointing to a non-existent revision
    (which should never happen, but you never know...), the release is loaded from
    scratch."""

    storage = get_storage("memory")

    origin = "http://example.org"
    dir1_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                           object_id=b"d" * 20)
    dir2_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                           object_id=b"e" * 20)

    date = TimestampWithTimezone.from_datetime(
        datetime.datetime.now(tz=datetime.timezone.utc))
    person = Person.from_fullname(b"Jane Doe <*****@*****.**>")

    rev1 = Revision(
        message=b"blah",
        author=person,
        date=date,
        committer=person,
        committer_date=date,
        directory=dir1_swhid.object_id,
        type=RevisionType.TAR,
        synthetic=True,
    )

    rel1 = Release(
        name=b"v1.0",
        message=b"blah\n",
        author=person,
        date=date,
        target=dir1_swhid.object_id,
        target_type=ModelObjectType.DIRECTORY,
        synthetic=True,
    )

    rev1_swhid = rev1.swhid()
    rel1_swhid = rel1.swhid()
    rev2_swhid = CoreSWHID(object_type=ObjectType.REVISION,
                           object_id=b"b" * 20)
    rel2_swhid = CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"c" * 20)

    # Results of a previous load
    storage.extid_add([
        ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid, 0),
        ExtID("extid-type1", b"extid-of-v2.0", rev2_swhid, 0),
    ])
    storage.revision_add([rev1])
    last_snapshot = Snapshot(
        branches={
            b"v1.0":
            SnapshotBranch(target_type=TargetType.REVISION,
                           target=rev1_swhid.object_id),
            b"v2.0":
            SnapshotBranch(target_type=TargetType.REVISION,
                           target=rev2_swhid.object_id),
        })
    storage.snapshot_add([last_snapshot])
    date = datetime.datetime.now(tz=datetime.timezone.utc)
    storage.origin_add([Origin(url=origin)])
    storage.origin_visit_add([
        OriginVisit(origin="http://example.org",
                    visit=1,
                    date=date,
                    type="tar")
    ])
    storage.origin_visit_status_add([
        OriginVisitStatus(
            origin=origin,
            visit=1,
            status="full",
            date=date,
            snapshot=last_snapshot.id,
        )
    ])

    loader = StubPackageLoader(storage, "http://example.org")
    patch.object(
        loader,
        "_load_release",
        return_value=(rel2_swhid.object_id, dir2_swhid.object_id),
        autospec=True,
    ).start()
    patch.object(
        loader,
        "get_versions",
        return_value=["v1.0", "v2.0", "v3.0"],
        autospec=True,
    ).start()

    caplog.set_level(logging.ERROR)

    loader.load()

    assert len(caplog.records) == 1
    (record, ) = caplog.records
    assert record.levelname == "ERROR"
    assert "Failed to upgrade branch branch-v2.0" in record.message

    assert loader._load_release.mock_calls == [
        # v1.0: not loaded because there is already a revision matching it
        # v2.0: loaded, as the revision is missing from the storage even though there
        #       is an extid
        call(StubPackageInfo(origin, "example-v2.0.tar", "v2.0"),
             Origin(url=origin)),
        # v3.0: loaded (did not exist yet)
        call(StubPackageInfo(origin, "example-v3.0.tar", "v3.0"),
             Origin(url=origin)),
    ]

    snapshot = Snapshot(
        branches={
            b"branch-v1.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel1_swhid.object_id),
            b"branch-v2.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel2_swhid.object_id),
            b"branch-v3.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel2_swhid.object_id),
        })
    assert snapshot_get_latest(storage, origin) == snapshot

    extids = storage.extid_get_from_target(
        ObjectType.RELEASE,
        [
            rel1_swhid.object_id,
            rel2_swhid.object_id,
        ],
    )

    assert set(extids) == {
        ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid),
        ExtID("extid-type1", b"extid-of-v2.0", rel2_swhid),
        ExtID("extid-type2", b"extid-of-v3.0", rel2_swhid),
    }

Example #15

0

Show file

def test_load_extids() -> None:
    """Checks PackageLoader.load() skips iff it should, and writes (only)
    the new ExtIDs"""
    storage = get_storage("memory")

    dir_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                          object_id=b"e" * 20)

    rels = [
        Release(
            name=f"v{i}.0".encode(),
            message=b"blah\n",
            target=dir_swhid.object_id,
            target_type=ModelObjectType.DIRECTORY,
            synthetic=True,
        ) for i in (1, 2, 3, 4)
    ]
    storage.release_add(rels[0:3])

    origin = "http://example.org"
    rel1_swhid = rels[0].swhid()
    rel2_swhid = rels[1].swhid()
    rel3_swhid = rels[2].swhid()
    rel4_swhid = rels[3].swhid()

    # Results of a previous load
    storage.extid_add([
        ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid),
        ExtID("extid-type2", b"extid-of-v2.0", rel2_swhid),
    ])
    last_snapshot = Snapshot(
        branches={
            b"v1.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel1_swhid.object_id),
            b"v2.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel2_swhid.object_id),
            b"v3.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel3_swhid.object_id),
        })
    storage.snapshot_add([last_snapshot])
    date = datetime.datetime.now(tz=datetime.timezone.utc)
    storage.origin_add([Origin(url=origin)])
    storage.origin_visit_add([
        OriginVisit(origin="http://example.org",
                    visit=1,
                    date=date,
                    type="tar")
    ])
    storage.origin_visit_status_add([
        OriginVisitStatus(
            origin=origin,
            visit=1,
            status="full",
            date=date,
            snapshot=last_snapshot.id,
        )
    ])

    loader = StubPackageLoader(storage, "http://example.org")
    patch.object(
        loader,
        "_load_release",
        return_value=(rel4_swhid.object_id, dir_swhid.object_id),
        autospec=True,
    ).start()

    loader.load()

    assert loader._load_release.mock_calls == [  # type: ignore
        # v1.0: not loaded because there is already its (extid_type, extid, rel)
        #       in the storage.
        # v2.0: loaded, because there is already a similar extid, but different type
        call(
            StubPackageInfo(origin, "example-v2.0.tar", "v2.0"),
            Origin(url=origin),
        ),
        # v3.0: loaded despite having an (extid_type, extid) in storage, because
        #       the target of the extid is not in the previous snapshot
        call(
            StubPackageInfo(origin, "example-v3.0.tar", "v3.0"),
            Origin(url=origin),
        ),
        # v4.0: loaded, because there isn't its extid
        call(
            StubPackageInfo(origin, "example-v4.0.tar", "v4.0"),
            Origin(url=origin),
        ),
    ]

    # then check the snapshot has all the branches.
    # versions 2.0 to 4.0 all point to rel4_swhid (instead of the value of the last
    # snapshot), because they had to be loaded (mismatched extid), and the mocked
    # _load_release always returns rel4_swhid.
    snapshot = Snapshot(
        branches={
            b"branch-v1.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel1_swhid.object_id),
            b"branch-v2.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel4_swhid.object_id),
            b"branch-v3.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel4_swhid.object_id),
            b"branch-v4.0":
            SnapshotBranch(target_type=TargetType.RELEASE,
                           target=rel4_swhid.object_id),
        })
    assert snapshot_get_latest(storage, origin) == snapshot

    extids = storage.extid_get_from_target(
        ObjectType.RELEASE,
        [
            rel1_swhid.object_id,
            rel2_swhid.object_id,
            rel3_swhid.object_id,
            rel4_swhid.object_id,
        ],
    )

    assert set(extids) == {
        # What we inserted at the beginning of the test:
        ExtID("extid-type1", b"extid-of-v1.0", rel1_swhid),
        ExtID("extid-type2", b"extid-of-v2.0", rel2_swhid),
        # Added by the loader:
        ExtID("extid-type1", b"extid-of-v2.0", rel4_swhid),
        ExtID("extid-type2", b"extid-of-v3.0", rel4_swhid),
        ExtID("extid-type2", b"extid-of-v4.0", rel4_swhid),
    }

Example #16

0

Show file

File: test_snapshot.py Project: SoftwareHeritage/swh-storage

def test_snapshot_get_latest(swh_storage, sample_data):
    origin = sample_data.origin
    swh_storage.origin_add([origin])

    visit1, visit2 = sample_data.origin_visits[:2]
    assert visit1.origin == origin.url

    swh_storage.origin_visit_add([visit1])
    ov1 = swh_storage.origin_visit_get_latest(origin.url)

    # Add snapshot to visit1, latest snapshot = visit 1 snapshot
    complete_snapshot = sample_data.snapshots[2]
    swh_storage.snapshot_add([complete_snapshot])

    swh_storage.origin_visit_status_add([
        OriginVisitStatus(
            origin=origin.url,
            visit=ov1.visit,
            date=visit2.date,
            status="partial",
            snapshot=None,
        )
    ])
    assert visit1.date < visit2.date

    # no snapshot associated to the visit, so None
    actual_snapshot = snapshot_get_latest(swh_storage,
                                          origin.url,
                                          allowed_statuses=["partial"])
    assert actual_snapshot is None

    date_now = now()
    assert visit2.date < date_now
    swh_storage.origin_visit_status_add([
        OriginVisitStatus(
            origin=ov1.origin,
            visit=ov1.visit,
            date=date_now,
            type=ov1.type,
            status="full",
            snapshot=complete_snapshot.id,
        )
    ])

    swh_storage.origin_visit_add(
        [OriginVisit(
            origin=origin.url,
            date=now(),
            type=visit1.type,
        )])

    actual_snapshot = snapshot_get_latest(swh_storage, origin.url)
    assert actual_snapshot is not None
    assert actual_snapshot == complete_snapshot

    actual_snapshot = snapshot_get_latest(swh_storage,
                                          origin.url,
                                          branches_count=1)
    assert actual_snapshot is not None
    assert actual_snapshot.id == complete_snapshot.id
    assert len(actual_snapshot.branches.values()) == 1

    with pytest.raises(ValueError,
                       match="branches_count must be a positive integer"):
        snapshot_get_latest(swh_storage,
                            origin.url,
                            branches_count="something-wrong")