Exemple #1
0
    def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha,
                     message):
        """Append a new log entry to the revlog at filepath.
        
        :param config_reader: configuration reader of the repository - used to obtain
            user information. May be None
        :param filepath: full path to the log file
        :param oldbinsha: binary sha of the previous commit
        :param newbinsha: binary sha of the current commit
        :param message: message describing the change to the reference
        :param write: If True, the changes will be written right away. Otherwise
            the change will not be written
        :return: RefLogEntry objects which was appended to the log
        :note: As we are append-only, concurrent access is not a problem as we 
            do not interfere with readers."""
        if len(oldbinsha) != 20 or len(newbinsha) != 20:
            raise ValueError("Shas need to be given in binary format")
        #END handle sha type
        assure_directory_exists(filepath, is_file=True)
        entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha),
                             Actor.committer(config_reader),
                             (int(time.time()), time.altzone), message))

        lf = LockFile(filepath)
        lf._obtain_lock_or_raise()

        fd = open(filepath, 'a')
        try:
            fd.write(repr(entry))
        finally:
            fd.close()
            lf._release_lock()
        #END handle write operation

        return entry
Exemple #2
0
    def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message):
        """Append a new log entry to the revlog at filepath.

        :param config_reader: configuration reader of the repository - used to obtain
            user information. May be None
        :param filepath: full path to the log file
        :param oldbinsha: binary sha of the previous commit
        :param newbinsha: binary sha of the current commit
        :param message: message describing the change to the reference
        :param write: If True, the changes will be written right away. Otherwise
            the change will not be written
        :return: RefLogEntry objects which was appended to the log
        :note: As we are append-only, concurrent access is not a problem as we 
            do not interfere with readers."""
        if len(oldbinsha) != 20 or len(newbinsha) != 20:
            raise ValueError("Shas need to be given in binary format")
        # END handle sha type
        assure_directory_exists(filepath, is_file=True)
        entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(
            config_reader), (int(time.time()), time.altzone), message))

        lf = LockFile(filepath)
        lf._obtain_lock_or_raise()

        fd = open(filepath, 'a')
        try:
            fd.write(repr(entry))
        finally:
            fd.close()
            lf._release_lock()
        # END handle write operation

        return entry
Exemple #3
0
 def test_basics(self):
     gdb = self.rorepo
     
     # partial to complete - works with everything
     hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
     assert len(hexsha) == 40
     
     assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha
     
     # fails with BadObject
     for invalid_rev in ("0000", "bad/ref", "super bad"):
         self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
Exemple #4
0
    def test_base(self):
        gdb = GitCmdObjectDB(osp.join(self.rorepo.git_dir, 'objects'), self.rorepo.git)

        # partial to complete - works with everything
        hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
        assert len(hexsha) == 40

        assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha

        # fails with BadObject
        for invalid_rev in ("0000", "bad/ref", "super bad"):
            self.assertRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
Exemple #5
0
    def append_entry(cls,
                     config_reader: Union[Actor, 'GitConfigParser',
                                          'SectionConstraint', None],
                     filepath: PathLike,
                     oldbinsha: bytes,
                     newbinsha: bytes,
                     message: str,
                     write: bool = True) -> 'RefLogEntry':
        """Append a new log entry to the revlog at filepath.

        :param config_reader: configuration reader of the repository - used to obtain
            user information. May also be an Actor instance identifying the committer directly or None.
        :param filepath: full path to the log file
        :param oldbinsha: binary sha of the previous commit
        :param newbinsha: binary sha of the current commit
        :param message: message describing the change to the reference
        :param write: If True, the changes will be written right away. Otherwise
            the change will not be written

        :return: RefLogEntry objects which was appended to the log

        :note: As we are append-only, concurrent access is not a problem as we
            do not interfere with readers."""

        if len(oldbinsha) != 20 or len(newbinsha) != 20:
            raise ValueError("Shas need to be given in binary format")
        # END handle sha type
        assure_directory_exists(filepath, is_file=True)
        first_line = message.split('\n')[0]
        if isinstance(config_reader, Actor):
            committer = config_reader  # mypy thinks this is Actor | Gitconfigparser, but why?
        else:
            committer = Actor.committer(config_reader)
        entry = RefLogEntry((bin_to_hex(oldbinsha).decode('ascii'),
                             bin_to_hex(newbinsha).decode('ascii'), committer,
                             (int(_time.time()), _time.altzone), first_line))

        if write:
            lf = LockFile(filepath)
            lf._obtain_lock_or_raise()
            fd = open(filepath, 'ab')
            try:
                fd.write(entry.format().encode(defenc))
            finally:
                fd.close()
                lf._release_lock()
            # END handle write operation
        return entry
Exemple #6
0
    def test_reading(self):
        gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
        assert isinstance(gdb.info(git_sha), OInfo)
        assert isinstance(gdb.stream(git_sha), OStream)
        assert gdb.size() > 200
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin(
            "5aebcd5cb3340fb31776941d7e4d518a712a8655")

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list[:50]):
            assert gdb.partial_to_complete_sha_hex(
                bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex,
                              "0000")
Exemple #7
0
 def test_reading(self):
     gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))
     
     # we have packs and loose objects, alternates doesn't necessarily exist
     assert 1 < len(gdb.databases()) < 4
     
     # access should be possible
     git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
     assert isinstance(gdb.info(git_sha), OInfo)
     assert isinstance(gdb.stream(git_sha), OStream)
     assert gdb.size() > 200
     sha_list = list(gdb.sha_iter())
     assert len(sha_list) == gdb.size()
     
     
     # This is actually a test for compound functionality, but it doesn't 
     # have a separate test module
     # test partial shas
     # this one as uneven and quite short
     assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
     
     # mix even/uneven hexshas
     for i, binsha in enumerate(sha_list[:50]):
         assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
     # END for each sha
     
     self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in git.test.performance.test_stream
        # It should be shared if possible
        objects_path = rwrepo.db_path('')
        ldb = self.LooseODBCls(objects_path)

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print >> sys.stderr, "Creating %s data ..." % desc
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print >> sys.stderr, "Done (in %f s)" % elapsed

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            hexsha = bin_to_hex(binsha)
            db_file = os.path.join(objects_path, hexsha[:2], hexsha[2:])
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            print >> sys.stderr, "%s: Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            print >> sys.stderr, "%s: Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, desc, elapsed_readall, size_kib / elapsed_readall)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert ''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print >> sys.stderr, "%s: Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)

            # del db file so git has something to do
            os.remove(db_file)
Exemple #9
0
 def _assert_index_entries(self, entries, trees):
     index = IndexFile.from_tree(
         self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees])
     assert entries
     assert len(index.entries) == len(entries)
     for entry in entries:
         assert (entry.path, entry.stage) in index.entries
Exemple #10
0
def short_to_long(odb, hexsha):
    """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha
        or None if no candidate could be found.
    :param hexsha: hexsha with less than 40 byte"""
    try:
        return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
    except BadObject:
        return None
Exemple #11
0
def short_to_long(odb, hexsha):
    """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha
        or None if no candidate could be found.
    :param hexsha: hexsha with less than 40 byte"""
    try:
        return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
    except BadObject:
        return None
Exemple #12
0
    def stream(self, sha):
        """For now, all lookup is done by git itself
        :note: As we don't know when the stream is actually read (and if it is 
            stored for later use) we read the data rigth away and cache it.
            This has HUGE performance implication, both for memory as for 
            reading/deserializing objects, but we have no other choice in order
            to make the database behaviour consistent with other implementations !"""

        hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
        return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
Exemple #13
0
 def stream(self, sha):
     """For now, all lookup is done by git itself
     :note: As we don't know when the stream is actually read (and if it is 
         stored for later use) we read the data rigth away and cache it.
         This has HUGE performance implication, both for memory as for 
         reading/deserializing objects, but we have no other choice in order
         to make the database behaviour consistent with other implementations !"""
     
     hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
     return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
    def test_basics(self, path):
        ldb = PureLooseObjectODB(path)

        # write data
        self._assert_object_writing(ldb)
        self._assert_object_writing_async(ldb)

        # verify sha iteration and size
        shas = list(ldb.sha_iter())
        assert shas and len(shas[0]) == 20

        assert len(shas) == ldb.size()

        # verify find short object
        long_sha = bin_to_hex(shas[-1])
        for short_sha in (long_sha[:20], long_sha[:5]):
            assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
        # END for each sha

        self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
Exemple #15
0
    def test_basics(self, path):
        ldb = PureLooseObjectODB(path)

        # write data
        self._assert_object_writing(ldb)
        self._assert_object_writing_async(ldb)

        # verify sha iteration and size
        shas = list(ldb.sha_iter())
        assert shas and len(shas[0]) == 20

        assert len(shas) == ldb.size()

        # verify find short object
        long_sha = bin_to_hex(shas[-1])
        for short_sha in (long_sha[:20], long_sha[:5]):
            assert bin_to_hex(
                ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
        # END for each sha

        self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex,
                              '0000')
Exemple #16
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: 20 byte binary sha1 string which matches the given name uniquely
     :param name: hexadecimal partial name
     :raise AmbiguousObjectName: 
     :raise BadObject: """
     candidate = None
     for binsha in self.sha_iter():
         if bin_to_hex(binsha).startswith(partial_hexsha):
             # it can't ever find the same object twice
             if candidate is not None:
                 raise AmbiguousObjectName(partial_hexsha)
             candidate = binsha
     # END for each object
     if candidate is None:
         raise BadObject(partial_hexsha)
     return candidate
Exemple #17
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: 20 byte binary sha1 string which matches the given name uniquely
     :param name: hexadecimal partial name
     :raise AmbiguousObjectName: 
     :raise BadObject: """
     candidate = None
     for binsha in self.sha_iter():
         if bin_to_hex(binsha).startswith(partial_hexsha):
             # it can't ever find the same object twice
             if candidate is not None:
                 raise AmbiguousObjectName(partial_hexsha)
             candidate = binsha
     # END for each object
     if candidate is None:
         raise BadObject(partial_hexsha)
     return candidate
Exemple #18
0
 def _map_loose_object(self, sha):
     """
     :return: memory map of that file to allow random read access
     :raise BadObject: if object could not be located"""
     db_path = self.db_path(self.object_path(bin_to_hex(sha)))
     try:
         return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
     except OSError, e:
         if e.errno != ENOENT:
             # try again without noatime
             try:
                 return file_contents_ro_filepath(db_path)
             except OSError:
                 raise BadObject(sha)
             # didn't work because of our flag, don't try it again
             self._fd_open_flags = 0
         else:
             raise BadObject(sha)
Exemple #19
0
	def _map_loose_object(self, sha):
		"""
		:return: memory map of that file to allow random read access
		:raise BadObject: if object could not be located"""
		db_path = self.db_path(self.object_path(bin_to_hex(sha)))
		try:
			return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
		except OSError,e:
			if e.errno != ENOENT:
				# try again without noatime
				try:
					return file_contents_ro_filepath(db_path)
				except OSError:
					raise BadObject(sha)
				# didn't work because of our flag, don't try it again
				self._fd_open_flags = 0
			else:
				raise BadObject(sha)
Exemple #20
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     return bin_to_hex(self.binsha)
Exemple #21
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     # b2a_hex produces bytes
     return bin_to_hex(self.binsha).decode('ascii')
Exemple #22
0
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in git.test.performance.test_stream
        # It should be shared if possible
        objects_path = rwrepo.db_path('')
        ldb = self.LooseODBCls(objects_path)

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print >> sys.stderr, "Creating %s data ..." % desc
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes,
                                            randomize)
            elapsed = time() - st
            print >> sys.stderr, "Done (in %f s)" % elapsed

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            hexsha = bin_to_hex(binsha)
            db_file = os.path.join(objects_path, hexsha[:2], hexsha[2:])
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            print >> sys.stderr, "%s: Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, fsize_kib, desc,
                elapsed_add, size_kib / elapsed_add)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            print >> sys.stderr, "%s: Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, desc, elapsed_readall,
                size_kib / elapsed_readall)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert ''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print >> sys.stderr, "%s: Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (
                self.LooseODBCls.__name__, size_kib, desc, cs_kib,
                elapsed_readchunks, size_kib / elapsed_readchunks)

            # del db file so git has something to do
            os.remove(db_file)
Exemple #23
0
 def info(self, binsha: bytes) -> OInfo:
     hexsha, typename, size = self._git.get_object_header(bin_to_hex(binsha))
     return OInfo(hex_to_bin(hexsha), typename, size)
Exemple #24
0
	def __str__(self):
		""":return: string of our SHA1 as understood by all git commands"""
		return bin_to_hex(self.binsha)
Exemple #25
0
	def hexsha(self):
		""":return: 40 byte hex version of our 20 byte binary sha"""
		return bin_to_hex(self.binsha)
Exemple #26
0
 def has_object(self, sha):
     try:
         self.readable_db_object_path(bin_to_hex(sha))
         return True
     except BadObject:
         return False
Exemple #27
0
 def _assert_index_entries(self, entries, trees):
     index = IndexFile.from_tree(self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees])
     assert entries
     assert len(index.entries) == len(entries)
     for entry in entries:
         assert (entry.path, entry.stage) in index.entries
Exemple #28
0
    def test_rev_parse(self):
        rev_parse = self.rorepo.rev_parse

        # try special case: This one failed at some point, make sure its fixed
        self.assertEqual(rev_parse("33ebe").hexsha, "33ebe7acec14b25c5f84f35a664803fcab2f7781")

        # start from reference
        num_resolved = 0

        for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)):
            path_tokens = ref.path.split("/")
            for pt in range(len(path_tokens)):
                path_section = '/'.join(path_tokens[-(pt + 1):])
                try:
                    obj = self._assert_rev_parse(path_section)
                    self.assertEqual(obj.type, ref.object.type)
                    num_resolved += 1
                except (BadName, BadObject):
                    print("failed on %s" % path_section)
                    # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112
                    pass
                # END exception handling
            # END for each token
            if ref_no == 3 - 1:
                break
        # END for each reference
        assert num_resolved

        # it works with tags !
        tag = self._assert_rev_parse('0.1.4')
        self.assertEqual(tag.type, 'tag')

        # try full sha directly ( including type conversion )
        self.assertEqual(tag.object, rev_parse(tag.object.hexsha))
        self._assert_rev_parse_types(tag.object.hexsha, tag.object)

        # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
        rev = '0.1.4^{tree}^{tree}'
        self.assertEqual(rev_parse(rev), tag.object.tree)
        self.assertEqual(rev_parse(rev + ':CHANGES'), tag.object.tree['CHANGES'])

        # try to get parents from first revision - it should fail as no such revision
        # exists
        first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
        commit = rev_parse(first_rev)
        self.assertEqual(len(commit.parents), 0)
        self.assertEqual(commit.hexsha, first_rev)
        self.failUnlessRaises(BadName, rev_parse, first_rev + "~")
        self.failUnlessRaises(BadName, rev_parse, first_rev + "^")

        # short SHA1
        commit2 = rev_parse(first_rev[:20])
        self.assertEqual(commit2, commit)
        commit2 = rev_parse(first_rev[:5])
        self.assertEqual(commit2, commit)

        # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
        # needs a tag which points to a blob

        # ref^0 returns commit being pointed to, same with ref~0, and ^{}
        tag = rev_parse('0.1.4')
        for token in (('~0', '^0', '^{}')):
            self.assertEqual(tag.object, rev_parse('0.1.4%s' % token))
        # END handle multiple tokens

        # try partial parsing
        max_items = 40
        for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
            self.assertEqual(rev_parse(bin_to_hex(binsha)[:8 - (i % 2)].decode('ascii')).binsha, binsha)
            if i > max_items:
                # this is rather slow currently, as rev_parse returns an object
                # which requires accessing packs, it has some additional overhead
                break
        # END for each binsha in repo

        # missing closing brace commit^{tree
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')

        # missing starting brace
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')

        # REVLOG
        #######
        head = self.rorepo.head

        # need to specify a ref when using the @ syntax
        self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha)

        # uses HEAD.ref by default
        self.assertEqual(rev_parse('@{0}'), head.commit)
        if not head.is_detached:
            refspec = '%s@{0}' % head.ref.name
            self.assertEqual(rev_parse(refspec), head.ref.commit)
            # all additional specs work as well
            self.assertEqual(rev_parse(refspec + "^{tree}"), head.commit.tree)
            self.assertEqual(rev_parse(refspec + ":CHANGES").type, 'blob')
        # END operate on non-detached head

        # position doesn't exist
        self.failUnlessRaises(IndexError, rev_parse, '@{10000}')

        # currently, nothing more is supported
        self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")

        # the last position
        assert rev_parse('@{1}') != head.commit
Exemple #29
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     # b2a_hex produces bytes
     return bin_to_hex(self.binsha).decode('ascii')
Exemple #30
0
 def stream(self, sha):
     """For now, all lookup is done by git itself"""
     hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
     return OStream(hex_to_bin(hexsha), typename, size, stream)
Exemple #31
0
 def stream(self, binsha: bytes) -> OStream:
     """For now, all lookup is done by git itself"""
     hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(binsha))
     return OStream(hex_to_bin(hexsha), typename, size, stream)
Exemple #32
0
 def __str__(self):
     """:return: string of our SHA1 as understood by all git commands"""
     return bin_to_hex(self.binsha)
Exemple #33
0
 def has_object(self, sha):
     try:
         self.readable_db_object_path(bin_to_hex(sha))
         return True
     except BadObject:
         return False
Exemple #34
0
 def partial_to_complete_sha(self, partial_binsha, hex_len):
     """Simple adaptor to feed into our implementation"""
     return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
Exemple #35
0
 def info(self, sha):
     hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
     return OInfo(hex_to_bin(hexsha), typename, size)
Exemple #36
0
 def info(self, sha):
     hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
     return OInfo(hex_to_bin(hexsha), typename, size)
Exemple #37
0
    def test_rev_parse(self):
        rev_parse = self.rorepo.rev_parse

        # try special case: This one failed at some point, make sure its fixed
        self.assertEqual(
            rev_parse("33ebe").hexsha,
            "33ebe7acec14b25c5f84f35a664803fcab2f7781")

        # start from reference
        num_resolved = 0

        for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)):
            path_tokens = ref.path.split("/")
            for pt in range(len(path_tokens)):
                path_section = '/'.join(path_tokens[-(pt + 1):])
                try:
                    obj = self._assert_rev_parse(path_section)
                    self.assertEqual(obj.type, ref.object.type)
                    num_resolved += 1
                except (BadName, BadObject):
                    print("failed on %s" % path_section)
                    # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112
                    pass
                # END exception handling
            # END for each token
            if ref_no == 3 - 1:
                break
        # END for each reference
        assert num_resolved

        # it works with tags !
        tag = self._assert_rev_parse('0.1.4')
        self.assertEqual(tag.type, 'tag')

        # try full sha directly ( including type conversion )
        self.assertEqual(tag.object, rev_parse(tag.object.hexsha))
        self._assert_rev_parse_types(tag.object.hexsha, tag.object)

        # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
        rev = '0.1.4^{tree}^{tree}'
        self.assertEqual(rev_parse(rev), tag.object.tree)
        self.assertEqual(rev_parse(rev + ':CHANGES'),
                         tag.object.tree['CHANGES'])

        # try to get parents from first revision - it should fail as no such revision
        # exists
        first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
        commit = rev_parse(first_rev)
        self.assertEqual(len(commit.parents), 0)
        self.assertEqual(commit.hexsha, first_rev)
        self.failUnlessRaises(BadName, rev_parse, first_rev + "~")
        self.failUnlessRaises(BadName, rev_parse, first_rev + "^")

        # short SHA1
        commit2 = rev_parse(first_rev[:20])
        self.assertEqual(commit2, commit)
        commit2 = rev_parse(first_rev[:5])
        self.assertEqual(commit2, commit)

        # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
        # needs a tag which points to a blob

        # ref^0 returns commit being pointed to, same with ref~0, and ^{}
        tag = rev_parse('0.1.4')
        for token in (('~0', '^0', '^{}')):
            self.assertEqual(tag.object, rev_parse('0.1.4%s' % token))
        # END handle multiple tokens

        # try partial parsing
        max_items = 40
        for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
            self.assertEqual(
                rev_parse(bin_to_hex(binsha)[:8 -
                                             (i % 2)].decode('ascii')).binsha,
                binsha)
            if i > max_items:
                # this is rather slow currently, as rev_parse returns an object
                # which requires accessing packs, it has some additional overhead
                break
        # END for each binsha in repo

        # missing closing brace commit^{tree
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')

        # missing starting brace
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')

        # REVLOG
        #######
        head = self.rorepo.head

        # need to specify a ref when using the @ syntax
        self.failUnlessRaises(BadObject, rev_parse,
                              "%s@{0}" % head.commit.hexsha)

        # uses HEAD.ref by default
        self.assertEqual(rev_parse('@{0}'), head.commit)
        if not head.is_detached:
            refspec = '%s@{0}' % head.ref.name
            self.assertEqual(rev_parse(refspec), head.ref.commit)
            # all additional specs work as well
            self.assertEqual(rev_parse(refspec + "^{tree}"), head.commit.tree)
            self.assertEqual(rev_parse(refspec + ":CHANGES").type, 'blob')
        # END operate on non-detached head

        # position doesn't exist
        self.failUnlessRaises(IndexError, rev_parse, '@{10000}')

        # currently, nothing more is supported
        self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")

        # the last position
        assert rev_parse('@{1}') != head.commit
Exemple #38
0
 def partial_to_complete_sha(self, partial_binsha, hex_len):
     """Simple adaptor to feed into our implementation"""
     return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
        # It should be shared if possible
        ldb = LooseObjectDB(osp.join(rwrepo.git_dir, 'objects'))

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
            fsize_kib = osp.getsize(db_file) / 1000

            size_kib = size / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
            print(msg, file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
            print(msg, file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = []
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)

            # del db file so git has something to do
            ostream = None
            import gc
            gc.collect()
            os.remove(db_file)

            # VS. CGIT
            ##########
            # CGIT ! Can using the cgit programs be faster ?
            proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)

            # write file - pump everything in at once to be a fast as possible
            data = stream.getvalue()    # cache it
            st = time()
            proc.stdin.write(data)
            proc.stdin.close()
            gitsha = proc.stdout.read().strip()
            proc.wait()
            gelapsed_add = time() - st
            del(data)
            assert gitsha == bin_to_hex(binsha)     # we do it the same way, right ?

            #  as its the same sha, we reuse our path
            fsize_kib = osp.getsize(db_file) / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
            print(msg, file=sys.stderr)

            # compare ...
            print("Git-Python is %f %% faster than git when adding big %s files"
                  % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr)

            # read all
            st = time()
            hexsha, typename, size, data = rwrepo.git.get_object_data(gitsha)  # @UnusedVariable
            gelapsed_readall = time() - st
            print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall), file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %sfiles"
                  % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr)

            # read chunks
            st = time()
            hexsha, typename, size, stream = rwrepo.git.stream_object_data(gitsha)  # @UnusedVariable
            while True:
                data = stream.read(cs)
                if len(data) < cs:
                    break
            # END read stream
            gelapsed_readchunks = time() - st
            msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
            print(msg, file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %s files in chunks"
                  % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), file=sys.stderr)
Exemple #40
0
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
        # It should be shared if possible
        ldb = LooseObjectDB(osp.join(rwrepo.git_dir, 'objects'))

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
            fsize_kib = osp.getsize(db_file) / 1000

            size_kib = size / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
            print(msg, file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
            print(msg, file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)

            # del db file so git has something to do
            ostream = None
            import gc
            gc.collect()
            os.remove(db_file)

            # VS. CGIT
            ##########
            # CGIT ! Can using the cgit programs be faster ?
            proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)

            # write file - pump everything in at once to be a fast as possible
            data = stream.getvalue()    # cache it
            st = time()
            proc.stdin.write(data)
            proc.stdin.close()
            gitsha = proc.stdout.read().strip()
            proc.wait()
            gelapsed_add = time() - st
            del(data)
            assert gitsha == bin_to_hex(binsha)     # we do it the same way, right ?

            #  as its the same sha, we reuse our path
            fsize_kib = osp.getsize(db_file) / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
            print(msg, file=sys.stderr)

            # compare ...
            print("Git-Python is %f %% faster than git when adding big %s files"
                  % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr)

            # read all
            st = time()
            hexsha, typename, size, data = rwrepo.git.get_object_data(gitsha)  # @UnusedVariable
            gelapsed_readall = time() - st
            print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall), file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %sfiles"
                  % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr)

            # read chunks
            st = time()
            hexsha, typename, size, stream = rwrepo.git.stream_object_data(gitsha)  # @UnusedVariable
            while True:
                data = stream.read(cs)
                if len(data) < cs:
                    break
            # END read stream
            gelapsed_readchunks = time() - st
            msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
            print(msg, file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %s files in chunks"
                  % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), file=sys.stderr)