コード例 #1
0
    def load_repo_two_double_fork_merge(self, git_loader):
        #
        #     2---4---6
        #    /   /   /
        #   1---3---5
        #
        repo = TestRepo()
        with repo as rp:
            (rp / "file1").write_text(TEST_CONTENT)
            c1 = repo.commit("Add file1")  # create commit 1
            repo.repo.refs[b"refs/heads/c1"] = c1  # branch c1 from master

            (rp / "file2").write_text(TEST_CONTENT)
            repo.commit("Add file2")  # create commit 2

            (rp / "file3").write_text(TEST_CONTENT)
            c3 = repo.commit("Add file3",
                             ref=b"refs/heads/c1")  # create commit 3 on c1
            repo.repo.refs[b"refs/heads/c3"] = c3  # branch c3 from c1

            repo.merge([c3])  # create commit 4

            (rp / "file5").write_text(TEST_CONTENT)
            c5 = repo.commit("Add file3",
                             ref=b"refs/heads/c3")  # create commit 5 on c3

            repo.merge([c5])  # create commit 6

            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
            loader = git_loader(str(rp))
            loader.load()
        return (loader, swhid)
コード例 #2
0
    def load_repo_triple_merge(self, git_loader):
        #
        #       .---.---5
        #      /   /   /
        #     2   3   4
        #    /   /   /
        #   1---.---.
        #
        repo = TestRepo()
        with repo as rp:
            (rp / "file1").write_text(TEST_CONTENT)
            c1 = repo.commit("Commit 1")
            repo.repo.refs[b"refs/heads/b1"] = c1
            repo.repo.refs[b"refs/heads/b2"] = c1

            repo.commit("Commit 2")
            c3 = repo.commit("Commit 3", ref=b"refs/heads/b1")
            c4 = repo.commit("Commit 4", ref=b"refs/heads/b2")
            repo.merge([c3, c4])

            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
            loader = git_loader(str(rp))
            loader.load()
        return (loader, swhid)
コード例 #3
0
    def load_repo_null_fields(self, git_loader):
        # Our schema doesn't enforce a lot of non-null revision fields. We need
        # to check these cases don't break the cooker.
        repo = TestRepo()
        with repo as rp:
            (rp / "file").write_text(TEST_CONTENT)
            c = repo.commit("initial commit")
            loader = git_loader(str(rp))
            loader.load()
            repo.repo.refs[b"HEAD"].decode()
            dir_id_hex = repo.repo[c].tree.decode()
            dir_id = hashutil.hash_to_bytes(dir_id_hex)

        test_revision = Revision(
            message=b"",
            author=Person(name=None, email=None, fullname=b""),
            date=None,
            committer=Person(name=None, email=None, fullname=b""),
            committer_date=None,
            parents=(),
            type=RevisionType.GIT,
            directory=dir_id,
            metadata={},
            synthetic=True,
        )

        storage = loader.storage
        storage.revision_add([test_revision])
        return (loader, test_revision.swhid())
コード例 #4
0
    def test_directory_simple(self, git_loader, cook_extract_directory):
        repo = TestRepo()
        with repo as rp:
            (rp / "file").write_text(TEST_CONTENT)
            (rp / "executable").write_bytes(TEST_EXECUTABLE)
            (rp / "executable").chmod(0o755)
            (rp / "link").symlink_to("file")
            (rp / "dir1/dir2").mkdir(parents=True)
            (rp / "dir1/dir2/file").write_text(TEST_CONTENT)
            c = repo.commit()
            loader = git_loader(str(rp))
            loader.load()

            obj_id_hex = repo.repo[c].tree.decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                              object_id=obj_id)

        with cook_extract_directory(loader.storage, swhid) as p:
            assert (p / "file").stat().st_mode == 0o100644
            assert (p / "file").read_text() == TEST_CONTENT
            assert (p / "executable").stat().st_mode == 0o100755
            assert (p / "executable").read_bytes() == TEST_EXECUTABLE
            assert (p / "link").is_symlink()
            assert os.readlink(str(p / "link")) == "file"
            assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
            assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT

            directory = from_disk.Directory.from_disk(path=bytes(p))
            assert obj_id_hex == hashutil.hash_to_hex(directory.hash)
コード例 #5
0
    def load_repo_filtered_objects(self, git_loader):
        repo = TestRepo()
        with repo as rp:
            file_1, id_1 = hash_content(b"test1")
            file_2, id_2 = hash_content(b"test2")
            file_3, id_3 = hash_content(b"test3")

            (rp / "file").write_bytes(file_1)
            (rp / "hidden_file").write_bytes(file_2)
            (rp / "absent_file").write_bytes(file_3)

            repo.commit()
            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
            loader = git_loader(str(rp))
            loader.load()

        # FIXME: storage.content_update() should be changed to allow things
        # like that
        with loader.storage.get_db().transaction() as cur:
            cur.execute(
                """update content set status = 'visible'
                           where sha1 = %s""",
                (id_1, ),
            )
            cur.execute(
                """update content set status = 'hidden'
                           where sha1 = %s""",
                (id_2, ),
            )

            cur.execute(
                """
                insert into skipped_content
                    (sha1, sha1_git, sha256, blake2s256, length, reason)
                select sha1, sha1_git, sha256, blake2s256, length, 'no reason'
                from content
                where sha1 = %s
                """,
                (id_3, ),
            )

            cur.execute("delete from content where sha1 = %s", (id_3, ))
        return (loader, swhid)
コード例 #6
0
    def load_repo_tags(self, git_loader):
        #        v-- t2
        #
        #    1---2----5      <-- master, t5, and t5a (annotated)
        #         \
        #          ----3----4     <-- t4a (annotated)
        #
        repo = TestRepo()
        with repo as rp:
            (rp / "file1").write_text(TEST_CONTENT)
            repo.commit("Add file1")

            (rp / "file2").write_text(TEST_CONTENT)
            repo.commit("Add file2")  # create c2

            repo.tag(b"t2")

            (rp / "file3").write_text(TEST_CONTENT)
            repo.commit("add file3")

            (rp / "file4").write_text(TEST_CONTENT)
            repo.commit("add file4")

            repo.tag(b"t4a", message=b"tag 4")

            # Go back to c2
            repo.git_shell("reset", "--hard", "HEAD^^")

            (rp / "file5").write_text(TEST_CONTENT)
            repo.commit("add file5")  # create c5

            repo.tag(b"t5")
            repo.tag(b"t5a", message=b"tag 5")

            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
            loader = git_loader(str(rp))
            loader.load()
        return (loader, swhid)
コード例 #7
0
    def test_directory_bogus_perms(self, git_loader, cook_extract_directory):
        # Some early git repositories have 664/775 permissions... let's check
        # if all the weird modes are properly normalized in the directory
        # cooker.
        repo = TestRepo()
        with repo as rp:
            (rp / "file").write_text(TEST_CONTENT)
            (rp / "file").chmod(0o664)
            (rp / "executable").write_bytes(TEST_EXECUTABLE)
            (rp / "executable").chmod(0o775)
            (rp / "wat").write_text(TEST_CONTENT)
            (rp / "wat").chmod(0o604)

            # Disable mode cleanup
            with unittest.mock.patch("dulwich.index.cleanup_mode",
                                     lambda mode: mode):
                c = repo.commit()

            # Make sure Dulwich didn't normalize the permissions itself.
            # (if it did, then the test can't check the cooker normalized them)
            tree_id = repo.repo[c].tree
            assert {entry.mode
                    for entry in repo.repo[tree_id].items()} == {
                        0o100775,
                        0o100664,
                        0o100604,
                    }

            # Disable mode checks
            with unittest.mock.patch("dulwich.objects.Tree.check",
                                     lambda self: None):
                loader = git_loader(str(rp))
                loader.load()

            # Make sure swh-loader didn't normalize them either
            dir_entries = loader.storage.directory_ls(
                hashutil.bytehex_to_hash(tree_id))
            assert {entry["perms"]
                    for entry in dir_entries} == {
                        0o100664,
                        0o100775,
                        0o100604,
                    }

            obj_id_hex = repo.repo[c].tree.decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                              object_id=obj_id)

        with cook_extract_directory(loader.storage, swhid) as p:
            assert (p / "file").stat().st_mode == 0o100644
            assert (p / "executable").stat().st_mode == 0o100755
            assert (p / "wat").stat().st_mode == 0o100644
コード例 #8
0
    def load_repo_two_heads(self, git_loader):
        #
        #    1---2----4      <-- master and b1
        #         \
        #          ----3     <-- b2
        #
        repo = TestRepo()
        with repo as rp:
            (rp / "file1").write_text(TEST_CONTENT)
            repo.commit("Add file1")

            (rp / "file2").write_text(TEST_CONTENT)
            c2 = repo.commit("Add file2")

            repo.repo.refs[b"refs/heads/b2"] = c2  # branch b2 from master

            (rp / "file3").write_text(TEST_CONTENT)
            repo.commit("add file3", ref=b"refs/heads/b2")

            (rp / "file4").write_text(TEST_CONTENT)
            c4 = repo.commit("add file4", ref=b"refs/heads/master")
            repo.repo.refs[b"refs/heads/b1"] = c4  # branch b1 from master

            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
            loader = git_loader(str(rp))
            loader.load()
        return (loader, swhid)
コード例 #9
0
    def test_directory_objstorage(self, swh_storage, git_loader, mocker,
                                  direct_objstorage):
        """Like test_directory_simple, but using swh_objstorage directly, without
        going through swh_storage.content_get_data()"""
        repo = TestRepo()
        with repo as rp:
            (rp / "file").write_text(TEST_CONTENT)
            (rp / "executable").write_bytes(TEST_EXECUTABLE)
            (rp / "executable").chmod(0o755)
            (rp / "link").symlink_to("file")
            (rp / "dir1/dir2").mkdir(parents=True)
            (rp / "dir1/dir2/file").write_text(TEST_CONTENT)
            c = repo.commit()
            loader = git_loader(str(rp))
            loader.load()

            obj_id_hex = repo.repo[c].tree.decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                              object_id=obj_id)

        # Set-up spies
        storage_content_get_data = mocker.patch.object(
            swh_storage,
            "content_get_data",
            wraps=swh_storage.content_get_data)
        objstorage_content_batch = mocker.patch.object(
            swh_storage.objstorage,
            "get_batch",
            wraps=swh_storage.objstorage.get_batch)

        with cook_extract_directory_git_bare(
                loader.storage, swhid,
                direct_objstorage=direct_objstorage) as p:
            assert (p / "file").stat().st_mode == 0o100644
            assert (p / "file").read_text() == TEST_CONTENT
            assert (p / "executable").stat().st_mode == 0o100755
            assert (p / "executable").read_bytes() == TEST_EXECUTABLE
            assert (p / "link").is_symlink()
            assert os.readlink(str(p / "link")) == "file"
            assert (p / "dir1/dir2/file").stat().st_mode == 0o100644
            assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT

            directory = from_disk.Directory.from_disk(path=bytes(p))
            assert obj_id_hex == hashutil.hash_to_hex(directory.hash)

        if direct_objstorage:
            storage_content_get_data.assert_not_called()
            objstorage_content_batch.assert_called()
        else:
            storage_content_get_data.assert_called()
            objstorage_content_batch.assert_not_called()
コード例 #10
0
    def test_directory_filtered_objects(self, git_loader,
                                        cook_extract_directory):
        repo = TestRepo()
        with repo as rp:
            file_1, id_1 = hash_content(b"test1")
            file_2, id_2 = hash_content(b"test2")
            file_3, id_3 = hash_content(b"test3")

            (rp / "file").write_bytes(file_1)
            (rp / "hidden_file").write_bytes(file_2)
            (rp / "absent_file").write_bytes(file_3)

            c = repo.commit()
            loader = git_loader(str(rp))
            loader.load()

            obj_id_hex = repo.repo[c].tree.decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.DIRECTORY,
                              object_id=obj_id)

        # FIXME: storage.content_update() should be changed to allow things
        # like that
        with loader.storage.get_db().transaction() as cur:
            cur.execute(
                """update content set status = 'visible'
                           where sha1 = %s""",
                (id_1, ),
            )
            cur.execute(
                """update content set status = 'hidden'
                           where sha1 = %s""",
                (id_2, ),
            )

            cur.execute(
                """
                insert into skipped_content
                    (sha1, sha1_git, sha256, blake2s256, length, reason)
                select sha1, sha1_git, sha256, blake2s256, length, 'no reason'
                from content
                where sha1 = %s
                """,
                (id_3, ),
            )

            cur.execute("delete from content where sha1 = %s", (id_3, ))

        with cook_extract_directory(loader.storage, swhid) as p:
            assert (p / "file").read_bytes() == b"test1"
            assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE
            assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE
コード例 #11
0
 def load_repo_two_roots(self, git_loader):
     #
     #    1----3---4
     #        /
     #   2----
     #
     repo = TestRepo()
     with repo as rp:
         (rp / "file1").write_text(TEST_CONTENT)
         c1 = repo.commit("Add file1")
         del repo.repo.refs[b"refs/heads/master"]  # git update-ref -d HEAD
         (rp / "file2").write_text(TEST_CONTENT)
         repo.commit("Add file2")
         repo.merge([c1])
         (rp / "file3").write_text(TEST_CONTENT)
         repo.commit("add file3")
         obj_id_hex = repo.repo.refs[b"HEAD"].decode()
         obj_id = hashutil.hash_to_bytes(obj_id_hex)
         swhid = CoreSWHID(object_type=ObjectType.REVISION,
                           object_id=obj_id)
         loader = git_loader(str(rp))
         loader.load()
     return (loader, swhid)
コード例 #12
0
    def load_repo_simple(self, git_loader):
        #
        #     1--2--3--4--5--6--7
        #
        repo = TestRepo()
        with repo as rp:
            (rp / "file1").write_text(TEST_CONTENT)
            repo.commit("add file1")
            (rp / "file2").write_text(TEST_CONTENT)
            repo.commit("add file2")
            (rp / "dir1/dir2").mkdir(parents=True)
            (rp / "dir1/dir2/file").write_text(TEST_CONTENT)

            (rp / "bin1").write_bytes(TEST_EXECUTABLE)
            (rp / "bin1").chmod(0o755)
            repo.commit("add bin1")
            (rp / "link1").symlink_to("file1")
            repo.commit("link link1 to file1")
            (rp / "file2").unlink()
            repo.commit("remove file2")
            (rp / "bin1").rename(rp / "bin")
            repo.commit("rename bin1 to bin")
            loader = git_loader(str(rp))
            loader.load()
            obj_id_hex = repo.repo.refs[b"HEAD"].decode()
            obj_id = hashutil.hash_to_bytes(obj_id_hex)
            swhid = CoreSWHID(object_type=ObjectType.REVISION,
                              object_id=obj_id)
        return (loader, swhid)