Beispiel #1
0
def dulwich_tsinfo_to_timestamp(
    timestamp,
    timezone: int,
    timezone_neg_utc: bool,
    timezone_bytes: Optional[bytes],
) -> TimestampWithTimezone:
    """Convert the dulwich timestamp information to a structure compatible with
    Software Heritage."""
    ts = Timestamp(
        seconds=int(timestamp),
        microseconds=0,
    )
    if timezone_bytes is None:
        # Failed to parse from the raw manifest, fallback to what Dulwich managed to
        # parse.
        return TimestampWithTimezone.from_numeric_offset(
            timestamp=ts,
            offset=timezone // 60,
            negative_utc=timezone_neg_utc,
        )
    else:
        return TimestampWithTimezone(timestamp=ts, offset_bytes=timezone_bytes)
def test_ignore_displayname(swh_storage, use_graph):
    """Tests the original authorship information is used instead of
    configured display names; otherwise objects would not match their hash,
    and git-fsck/git-clone would fail.

    This tests both with and without swh-graph, as both configurations use different
    code paths to fetch revisions.
    """

    date = TimestampWithTimezone.from_numeric_offset(Timestamp(1643882820, 0),
                                                     0, False)
    legacy_person = Person.from_fullname(b"old me <*****@*****.**>")
    current_person = Person.from_fullname(b"me <*****@*****.**>")

    content = Content.from_data(b"foo")
    swh_storage.content_add([content])

    directory = Directory(
        entries=(DirectoryEntry(name=b"file1",
                                type="file",
                                perms=0o100644,
                                target=content.sha1_git), ), )
    swh_storage.directory_add([directory])

    revision = Revision(
        message=b"rev",
        author=legacy_person,
        date=date,
        committer=legacy_person,
        committer_date=date,
        parents=(),
        type=RevisionType.GIT,
        directory=directory.id,
        synthetic=True,
    )
    swh_storage.revision_add([revision])

    release = Release(
        name=b"v1.1.0",
        message=None,
        author=legacy_person,
        date=date,
        target=revision.id,
        target_type=ObjectType.REVISION,
        synthetic=True,
    )
    swh_storage.release_add([release])

    snapshot = Snapshot(
        branches={
            b"refs/tags/v1.1.0":
            SnapshotBranch(target=release.id, target_type=TargetType.RELEASE),
            b"HEAD":
            SnapshotBranch(target=revision.id,
                           target_type=TargetType.REVISION),
        })
    swh_storage.snapshot_add([snapshot])

    # Add all objects to graph
    if use_graph:
        from swh.graph.naive_client import NaiveClient as GraphClient

        nodes = [
            str(x.swhid())
            for x in [content, directory, revision, release, snapshot]
        ]
        edges = [(str(x.swhid()), str(y.swhid())) for (x, y) in [
            (directory, content),
            (revision, directory),
            (release, revision),
            (snapshot, release),
            (snapshot, revision),
        ]]
        swh_graph = unittest.mock.Mock(
            wraps=GraphClient(nodes=nodes, edges=edges))
    else:
        swh_graph = None

    # Set a display name
    with swh_storage.db() as db:
        with db.transaction() as cur:
            cur.execute(
                "UPDATE person set displayname = %s where fullname = %s",
                (current_person.fullname, legacy_person.fullname),
            )

    # Check the display name did apply in the storage
    assert swh_storage.revision_get([revision.id])[0] == attr.evolve(
        revision,
        author=current_person,
        committer=current_person,
    )

    # Cook
    cooked_swhid = snapshot.swhid()
    backend = InMemoryVaultBackend()
    cooker = GitBareCooker(
        cooked_swhid,
        backend=backend,
        storage=swh_storage,
        graph=swh_graph,
    )

    cooker.cook()

    # Get bundle
    bundle = backend.fetch("git_bare", cooked_swhid)

    # Extract bundle and make sure both revisions are in it
    with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir:
        with tarfile.open(fileobj=io.BytesIO(bundle)) as tf:
            tf.extractall(tempdir)

        # If we are here, it means git-fsck succeeded when called by cooker.cook(),
        # so we already know the original person was used. Let's double-check.

        repo = dulwich.repo.Repo(f"{tempdir}/{cooked_swhid}.git")

        tag = repo[b"refs/tags/v1.1.0"]
        assert tag.tagger == legacy_person.fullname

        commit = repo[tag.object[1]]
        assert commit.author == legacy_person.fullname
Beispiel #3
0
    def test_original_malformed_objects(self, swh_storage,
                                        cook_extract_snapshot):
        """Tests that objects that were originally malformed:

        * are still interpreted somewhat correctly (if the loader could make sense of
          them), especially that they still have links to children
        * have their original manifest in the bundle
        """
        date = TimestampWithTimezone.from_numeric_offset(
            Timestamp(1643819927, 0), 0, False)

        content = Content.from_data(b"foo")
        swh_storage.content_add([content])

        # disordered
        # fmt: off
        malformed_dir_manifest = (b"" + b"100644 file2\x00" +
                                  content.sha1_git + b"100644 file1\x00" +
                                  content.sha1_git)
        # fmt: on
        directory = Directory(
            entries=(
                DirectoryEntry(name=b"file1",
                               type="file",
                               perms=0o100644,
                               target=content.sha1_git),
                DirectoryEntry(name=b"file2",
                               type="file",
                               perms=0o100644,
                               target=content.sha1_git),
            ),
            raw_manifest=f"tree {len(malformed_dir_manifest)}\x00".encode() +
            malformed_dir_manifest,
        )
        swh_storage.directory_add([directory])

        # 'committer' and 'author' swapped
        # fmt: off
        malformed_rev_manifest = (
            b"tree " + hashutil.hash_to_bytehex(directory.id) + b"\n" +
            b"committer me <*****@*****.**> 1643819927 +0000\n" +
            b"author me <*****@*****.**> 1643819927 +0000\n" + b"\n" +
            b"rev")
        # fmt: on
        revision = Revision(
            message=b"rev",
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            committer=Person.from_fullname(b"me <*****@*****.**>"),
            committer_date=date,
            parents=(),
            type=RevisionType.GIT,
            directory=directory.id,
            synthetic=True,
            raw_manifest=f"commit {len(malformed_rev_manifest)}\x00".encode() +
            malformed_rev_manifest,
        )
        swh_storage.revision_add([revision])

        # 'tag' and 'tagger' swapped
        # fmt: off
        malformed_rel_manifest = (
            b"object " + hashutil.hash_to_bytehex(revision.id) + b"\n" +
            b"type commit\n" +
            b"tagger me <*****@*****.**> 1643819927 +0000\n" +
            b"tag v1.1.0\n")
        # fmt: on

        release = Release(
            name=b"v1.1.0",
            message=None,
            author=Person.from_fullname(b"me <*****@*****.**>"),
            date=date,
            target=revision.id,
            target_type=ModelObjectType.REVISION,
            synthetic=True,
            raw_manifest=f"tag {len(malformed_rel_manifest)}\x00".encode() +
            malformed_rel_manifest,
        )
        swh_storage.release_add([release])

        snapshot = Snapshot(
            branches={
                b"refs/tags/v1.1.0":
                SnapshotBranch(target=release.id,
                               target_type=TargetType.RELEASE),
                b"HEAD":
                SnapshotBranch(target=revision.id,
                               target_type=TargetType.REVISION),
            })
        swh_storage.snapshot_add([snapshot])

        with cook_extract_snapshot(swh_storage, snapshot.swhid()) as (ert, p):
            tag = ert.repo[b"refs/tags/v1.1.0"]
            assert tag.as_raw_string() == malformed_rel_manifest

            commit = ert.repo[tag.object[1]]
            assert commit.as_raw_string() == malformed_rev_manifest

            tree = ert.repo[commit.tree]
            assert tree.as_raw_string() == malformed_dir_manifest