def test_reading(self): gdb = GitDB(os.path.join(self.gitrepopath, 'objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = next(gdb.sha_iter()) assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) ni = 50 assert gdb.size() >= ni sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() sha_list = sha_list[:ni] # speed up tests ... # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short gitdb_sha_hex = bin_to_hex(gitdb_sha) assert gdb.partial_to_complete_sha_hex(gitdb_sha_hex[:5]) == gitdb_sha # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8 - (i % 2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def test_reading(self): gdb = GitDB(os.path.join(self.gitrepopath, 'objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = next(gdb.sha_iter()) assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) ni = 50 assert gdb.size() >= ni sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() sha_list = sha_list[:ni] # speed up tests ... # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short gitdb_sha_hex = bin_to_hex(gitdb_sha) assert gdb.partial_to_complete_sha_hex(gitdb_sha_hex[:5]) == gitdb_sha # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex( bin_to_hex(binsha)[:8 - (i % 2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED): """Create a new on-disk entity comprised of a properly named pack file and a properly named and corresponding index file. The pack contains all OStream objects contained in object iter. :param base_dir: directory which is to contain the files :return: PackEntity instance initialized with the new pack **Note:** for more information on the other parameters see the write_pack method""" pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir) index_fd, index_path = tempfile.mkstemp('', 'index', base_dir) pack_write = lambda d: os.write(pack_fd, d) index_write = lambda d: os.write(index_fd, d) pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression) os.close(pack_fd) os.close(index_fd) fmt = "pack-%s.%s" new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack')) new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx')) os.rename(pack_path, new_pack_path) os.rename(index_path, new_index_path) return cls(new_pack_path)
def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): """Append a new log entry to the revlog at filepath. :param config_reader: configuration reader of the repository - used to obtain user information. May be None :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written :return: RefLogEntry objects which was appended to the log :note: As we are append-only, concurrent access is not a problem as we do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") #END handle sha type assure_directory_exists(filepath, is_file=True) entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) lf = LockFile(filepath) lf._obtain_lock_or_raise() fd = open(filepath, 'a') try: fd.write(repr(entry)) finally: fd.close() lf._release_lock() #END handle write operation return entry
def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): """Append a new log entry to the revlog at filepath. :param config_reader: configuration reader of the repository - used to obtain user information. May be None :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written :return: RefLogEntry objects which was appended to the log :note: As we are append-only, concurrent access is not a problem as we do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") #END handle sha type assure_directory_exists(filepath, is_file=True) entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) lf = LockFile(filepath) lf._obtain_lock_or_raise() fd = open(filepath, 'a') try: fd.write(repr(entry)) finally: fd.close() lf._release_lock() #END handle write operation return entry
def test_base(self): gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git) # partial to complete - works with everything hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) assert len(hexsha) == 40 assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha # fails with BadObject for invalid_rev in ("0000", "bad/ref", "super bad"): self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
def test_base(self): gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, "objects"), self.rorepo.git) # partial to complete - works with everything hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) assert len(hexsha) == 40 assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha # fails with BadObject for invalid_rev in ("0000", "bad/ref", "super bad"): self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / (elapsed_add or 1)), file=sys.stderr) # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / (elapsed_readall or 1)), file=sys.stderr) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(sha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / (elapsed_readchunks or 1)), file=sys.stderr) # del db file so we keep something to do ostream = None # To release the file handle (win) remove(db_file)
def test_reading(self): gdb = GitDB(fixture_path('../../../.git/objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) assert gdb.size() > 200 sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin( "155b62a9af0aa7677078331e111d0f7aa6eb4afc") # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex( bin_to_hex(binsha)[:8 - (i % 2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def test_loose_correctness(self): """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back into the loose object db (memory). This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220 faster :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed data files, like archives.""" from gitdb.util import bin_to_hex pdb = GitDB(os.path.join(self.gitrepopath, 'objects')) mdb = MemoryDB() for c, sha in enumerate(pdb.sha_iter()): ostream = pdb.stream(sha) # the issue only showed on larger files which are hardly compressible ... if ostream.type != str_blob_type: continue istream = IStream(ostream.type, ostream.size, ostream.stream) mdb.store(istream) assert istream.binsha == sha, "Failed on object %s" % bin_to_hex( sha).decode('ascii') # this can fail ... sometimes, so the packs dataset should be huge assert len(mdb.stream(sha).read()) == ostream.size if c and c % 1000 == 0: print( "Verified %i loose object compression/decompression cycles" % c, file=sys.stderr) mdb._cache.clear()
def _assert_index_entries(self, entries, trees): index = IndexFile.from_tree( self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees]) assert entries assert len(index.entries) == len(entries) for entry in entries: assert (entry.path, entry.stage) in index.entries
def test_reading(self): gdb = GitDB(fixture_path('../../.git/objects')) # we have packs and loose objects, alternates doesn't necessarily exist assert 1 < len(gdb.databases()) < 4 # access should be possible gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976") assert isinstance(gdb.info(gitdb_sha), OInfo) assert isinstance(gdb.stream(gitdb_sha), OStream) assert gdb.size() > 200 sha_list = list(gdb.sha_iter()) assert len(sha_list) == gdb.size() # This is actually a test for compound functionality, but it doesn't # have a separate test module # test partial shas # this one as uneven and quite short assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc") # mix even/uneven hexshas for i, binsha in enumerate(sha_list): assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha # END for each sha self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
def short_to_long(odb, hexsha): """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha or None if no candidate could be found. :param hexsha: hexsha with less than 40 byte""" try: return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) except BadObject: return None
def short_to_long(odb, hexsha): """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha or None if no candidate could be found. :param hexsha: hexsha with less than 40 byte""" try: return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) except BadObject: return None
def create_submodule_tree(odb, submodule_commit_hexsha): submodule_conf = '/Users/kenjif/test_gitmodules' conf_mode, conf_binsha = write_blob_from_path(odb, submodule_conf) tree_contents = [] tree_contents.append((conf_mode, conf_binsha, '.gitmodules')) tree_contents.append(get_submodule_tree_content(submodule_commit_hexsha, 'jEdit')) tree_mode, binsha = mktree_from_iter(odb, tree_contents) return bin_to_hex(binsha)
def create_submodule_tree(odb, submodule_commit_hexsha): submodule_conf = "/Users/kenjif/test_gitmodules" conf_mode, conf_binsha = write_blob(odb, submodule_conf) tree_contents = [] tree_contents.append((conf_mode, conf_binsha, ".gitmodules")) tree_contents.append(get_submodule_tree_content(submodule_commit_hexsha, "jEdit")) tree_mode, binsha = mktree_from_iter(odb, tree_contents) return bin_to_hex(binsha)
def test_basics(self, path): ldb = LooseObjectDB(path) # write data self._assert_object_writing(ldb) # verify sha iteration and size shas = list(ldb.sha_iter()) assert shas and len(shas[0]) == 20 assert len(shas) == ldb.size() # verify find short object long_sha = bin_to_hex(shas[-1]) for short_sha in (long_sha[:20], long_sha[:5]): assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha # END for each sha self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
def test_basics(self, path): ldb = LooseObjectDB(path) # write data self._assert_object_writing(ldb) self._assert_object_writing_async(ldb) # verify sha iteration and size shas = list(ldb.sha_iter()) assert shas and len(shas[0]) == 20 assert len(shas) == ldb.size() # verify find short object long_sha = bin_to_hex(shas[-1]) for short_sha in (long_sha[:20], long_sha[:5]): assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha # END for each sha self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED): """Create a new on-disk entity comprised of a properly named pack file and a properly named and corresponding index file. The pack contains all OStream objects contained in object iter. :param base_dir: directory which is to contain the files :return: PackEntity instance initialized with the new pack **Note:** for more information on the other parameters see the write_pack method""" pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir) index_fd, index_path = tempfile.mkstemp('', 'index', base_dir) pack_write = lambda d: os.write(pack_fd, d) index_write = lambda d: os.write(index_fd, d) pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression) os.close(pack_fd) os.close(index_fd) fmt = "pack-%s.%s" new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack')) new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx')) os.rename(pack_path, new_pack_path) os.rename(index_path, new_index_path) return cls(new_pack_path)
def partial_to_complete_sha_hex(self, partial_hexsha): """:return: 20 byte binary sha1 string which matches the given name uniquely :param name: hexadecimal partial name (bytes or ascii string) :raise AmbiguousObjectName: :raise BadObject: """ candidate = None for binsha in self.sha_iter(): if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)): # it can't ever find the same object twice if candidate is not None: raise AmbiguousObjectName(partial_hexsha) candidate = binsha # END for each object if candidate is None: raise BadObject(partial_hexsha) return candidate
def partial_to_complete_sha_hex(self, partial_hexsha): """:return: 20 byte binary sha1 string which matches the given name uniquely :param name: hexadecimal partial name :raise AmbiguousObjectName: :raise BadObject: """ candidate = None for binsha in self.sha_iter(): if bin_to_hex(binsha).startswith(partial_hexsha): # it can't ever find the same object twice if candidate is not None: raise AmbiguousObjectName(partial_hexsha) candidate = binsha # END for each object if candidate is None: raise BadObject(partial_hexsha) return candidate
def commit_from_binsha(repo, binsha, org_commit, parents=None): tree = Tree.new(repo, bin_to_hex(binsha)) env = os.environ offset = altz_to_utctz_str(org_commit.author_tz_offset) date = org_commit.authored_date env[Commit.env_author_date] = '{} {}'.format(date, offset) offset = altz_to_utctz_str(org_commit.committer_tz_offset) date = org_commit.committed_date env[Commit.env_committer_date] = '{} {}'.format(date, offset) return Commit.create_from_tree(repo, tree, org_commit.message, parents, head=True, author=org_commit.author, committer=org_commit.committer)
def _map_loose_object(self, sha): """ :return: memory map of that file to allow random read access :raise BadObject: if object could not be located""" db_path = self.db_path(self.object_path(bin_to_hex(sha))) try: return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) except OSError as e: if e.errno != ENOENT: # try again without noatime try: return file_contents_ro_filepath(db_path) except OSError: raise BadObject(sha) # didn't work because of our flag, don't try it again self._fd_open_flags = 0 else: raise BadObject(sha)
def _map_loose_object(self, sha): """ :return: memory map of that file to allow random read access :raise BadObject: if object could not be located""" db_path = self.db_path(self.object_path(bin_to_hex(sha))) try: return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) except OSError as e: if e.errno != ENOENT: # try again without noatime try: return file_contents_ro_filepath(db_path) except OSError: raise BadObject(sha) # didn't work because of our flag, don't try it again self._fd_open_flags = 0 else: raise BadObject(sha)
def commit_from_binsha(repo, binsha, org_commit, parents=None): env = os.environ author_date = "%d %s" % (org_commit.authored_date, altz_to_utctz_str(org_commit.author_tz_offset)) env[Commit.env_author_date] = author_date committer_date = "%d %s" % (org_commit.committed_date, altz_to_utctz_str(org_commit.committer_tz_offset)) env[Commit.env_committer_date] = committer_date env[Actor.env_author_name] = org_commit.author.name.encode(org_commit.encoding) env[Actor.env_author_email] = org_commit.author.email or "" env[Actor.env_committer_name] = org_commit.committer.name.encode(org_commit.encoding) env[Actor.env_committer_email] = org_commit.committer.email or "" message = org_commit.message.encode(org_commit.encoding) tree = Tree.new(repo, bin_to_hex(binsha)) return Commit.create_from_tree(repo, tree, message, parents, True)
def commit_from_binsha(repo, binsha, org_commit, parents=None): message = org_commit.message.encode(org_commit.encoding) tree = Tree.new(repo, bin_to_hex(binsha)) new_commit = Commit(repo, Commit.NULL_BIN_SHA, tree, org_commit.author, org_commit.authored_date, org_commit.author_tz_offset, org_commit.committer, org_commit.committed_date, org_commit.committer_tz_offset, message, parents, org_commit.encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(Commit.type, streamlen, stream)) new_commit.binsha = istream.binsha try: repo.head.set_commit(new_commit, logmsg="commit: %s" % message) except ValueError: master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) return new_commit
def test_loose_correctness(self): """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back into the loose object db (memory). This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220 faster :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed data files, like archives.""" from gitdb.util import bin_to_hex pdb = GitDB(os.path.join(self.gitrepopath, 'objects')) mdb = MemoryDB() for c, sha in enumerate(pdb.sha_iter()): ostream = pdb.stream(sha) # the issue only showed on larger files which are hardly compressible ... if ostream.type != str_blob_type: continue istream = IStream(ostream.type, ostream.size, ostream.stream) mdb.store(istream) assert istream.binsha == sha, "Failed on object %s" % bin_to_hex(sha).decode('ascii') # this can fail ... sometimes, so the packs dataset should be huge assert len(mdb.stream(sha).read()) == ostream.size if c and c % 1000 == 0: print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr) mdb._cache.clear()
def has_object(self, sha): try: self.readable_db_object_path(bin_to_hex(sha)) return True except BadObject: return False
def stream(self, sha): """For now, all lookup is done by pygit itself""" hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) return OStream(hex_to_bin(hexsha), typename, size, stream)
def __str__(self): """:return: string of our SHA1 as understood by all git commands""" return bin_to_hex(self.binsha)
def test_rev_parse(self): rev_parse = self.rorepo.rev_parse # try special case: This one failed at some point, make sure its fixed assert rev_parse( "33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781" # start from reference num_resolved = 0 for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)): path_tokens = ref.path.split("/") for pt in range(len(path_tokens)): path_section = '/'.join(path_tokens[-(pt + 1):]) try: obj = self._assert_rev_parse(path_section) assert obj.type == ref.object.type num_resolved += 1 except (BadName, BadObject): print("failed on %s" % path_section) # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 pass # END exception handling # END for each token if ref_no == 3 - 1: break # END for each reference assert num_resolved # it works with tags ! tag = self._assert_rev_parse('0.1.4') assert tag.type == 'tag' # try full sha directly ( including type conversion ) assert tag.object == rev_parse(tag.object.hexsha) self._assert_rev_parse_types(tag.object.hexsha, tag.object) # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES rev = '0.1.4^{tree}^{tree}' assert rev_parse(rev) == tag.object.tree assert rev_parse(rev + ':CHANGES') == tag.object.tree['CHANGES'] # try to get parents from first revision - it should fail as no such revision # exists first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" commit = rev_parse(first_rev) assert len(commit.parents) == 0 assert commit.hexsha == first_rev self.failUnlessRaises(BadName, rev_parse, first_rev + "~") self.failUnlessRaises(BadName, rev_parse, first_rev + "^") # short SHA1 commit2 = rev_parse(first_rev[:20]) assert commit2 == commit commit2 = rev_parse(first_rev[:5]) assert commit2 == commit # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one # needs a tag which points to a blob # ref^0 returns commit being pointed to, same with ref~0, and ^{} tag = rev_parse('0.1.4') for token in (('~0', '^0', '^{}')): assert tag.object == rev_parse('0.1.4%s' % token) # END handle multiple tokens # try partial parsing max_items = 40 for i, binsha in enumerate(self.rorepo.odb.sha_iter()): assert rev_parse( bin_to_hex(binsha)[:8 - (i % 2)].decode('ascii')).binsha == binsha if i > max_items: # this is rather slow currently, as rev_parse returns an object # which requires accessing packs, it has some additional overhead break # END for each binsha in repo # missing closing brace commit^{tree self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree') # missing starting brace self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}') # REVLOG ####### head = self.rorepo.head # need to specify a ref when using the @ syntax self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha) # uses HEAD.ref by default assert rev_parse('@{0}') == head.commit if not head.is_detached: refspec = '%s@{0}' % head.ref.name assert rev_parse(refspec) == head.ref.commit # all additional specs work as well assert rev_parse(refspec + "^{tree}") == head.commit.tree assert rev_parse(refspec + ":CHANGES").type == 'blob' # END operate on non-detached head # position doesn't exist self.failUnlessRaises(IndexError, rev_parse, '@{10000}') # currently, nothing more is supported self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}") # the last position assert rev_parse('@{1}') != head.commit
def test_rev_parse(self): rev_parse = self.rorepo.rev_parse # try special case: This one failed beforehand assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781" # start from reference num_resolved = 0 for ref in Reference.iter_items(self.rorepo): path_tokens = ref.path.split("/") for pt in range(len(path_tokens)): path_section = '/'.join(path_tokens[-(pt+1):]) try: obj = self._assert_rev_parse(path_section) assert obj.type == ref.object.type num_resolved += 1 except BadObject: print "failed on %s" % path_section # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 pass # END exception handling # END for each token # END for each reference assert num_resolved # it works with tags ! tag = self._assert_rev_parse('0.1.4') assert tag.type == 'tag' # try full sha directly ( including type conversion ) assert tag.object == rev_parse(tag.object.hexsha) self._assert_rev_parse_types(tag.object.hexsha, tag.object) # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES rev = '0.1.4^{tree}^{tree}' assert rev_parse(rev) == tag.object.tree assert rev_parse(rev+':CHANGES') == tag.object.tree['CHANGES'] # try to get parents from first revision - it should fail as no such revision # exists first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" commit = rev_parse(first_rev) assert len(commit.parents) == 0 assert commit.hexsha == first_rev self.failUnlessRaises(BadObject, rev_parse, first_rev+"~") self.failUnlessRaises(BadObject, rev_parse, first_rev+"^") # short SHA1 commit2 = rev_parse(first_rev[:20]) assert commit2 == commit commit2 = rev_parse(first_rev[:5]) assert commit2 == commit # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one # needs a tag which points to a blob # ref^0 returns commit being pointed to, same with ref~0, and ^{} tag = rev_parse('0.1.4') for token in (('~0', '^0', '^{}')): assert tag.object == rev_parse('0.1.4%s' % token) # END handle multiple tokens # try partial parsing max_items = 40 for i, binsha in enumerate(self.rorepo.odb.sha_iter()): assert rev_parse(bin_to_hex(binsha)[:8-(i%2)]).binsha == binsha if i > max_items: # this is rather slow currently, as rev_parse returns an object # which requires accessing packs, it has some additional overhead break # END for each binsha in repo # missing closing brace commit^{tree self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree') # missing starting brace self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}') # cannot handle rev-log for now self.failUnlessRaises(ValueError, rev_parse, "hi@there")
def __str__(self): """:return: string of our SHA1 as understood by all git commands""" return bin_to_hex(self.binsha)
def info(self, sha): hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) return OInfo(hex_to_bin(hexsha), typename, size)
def hexsha(self): """:return: our sha, hex encoded, 40 bytes""" return bin_to_hex(self[0])
def hexsha(self): return bin_to_hex(self[0])
def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print >> sys.stderr, "Creating %s data ..." % desc st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print >> sys.stderr, "Done (in %f s)" % elapsed string_ios.append(stream) # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) # reading in chunks of 1 MiB cs = 512*1000 chunks = list() st = time() ostream = ldb.stream(sha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert ''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) # del db file so we keep something to do os.remove(db_file) # END for each randomization factor # multi-threaded mode # want two, should be supported by most of todays cpus pool.set_size(2) total_kib = 0 nsios = len(string_ios) for stream in string_ios: stream.seek(0) total_kib += len(stream.getvalue()) / 1000 # END rewind def istream_iter(): for stream in string_ios: stream.seek(0) yield IStream(str_blob_type, len(stream.getvalue()), stream) # END for each stream # END util # write multiple objects at once, involving concurrent compression reader = IteratorReader(istream_iter()) istream_reader = ldb.store_async(reader) istream_reader.task().max_chunksize = 1 st = time() istreams = istream_reader.read(nsios) assert len(istreams) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) # decompress multiple at once, by reading them # chunk size is not important as the stream will not really be decompressed # until its read istream_reader = IteratorReader(iter([ i.binsha for i in istreams ])) ostream_reader = ldb.stream_async(istream_reader) chunk_task = TestStreamReader(ostream_reader, "chunker", None) output_reader = pool.add_task(chunk_task) output_reader.task().max_chunksize = 1 st = time() assert len(output_reader.read(nsios)) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed) # store the files, and read them back. For the reading, we use a task # as well which is chunked into one item per task. Reading all will # very quickly result in two threads handling two bytestreams of # chained compression/decompression streams reader = IteratorReader(istream_iter()) istream_reader = ldb.store_async(reader) istream_reader.task().max_chunksize = 1 istream_to_sha = lambda items: [ i.binsha for i in items ] istream_reader.set_post_cb(istream_to_sha) ostream_reader = ldb.stream_async(istream_reader) chunk_task = TestStreamReader(ostream_reader, "chunker", None) output_reader = pool.add_task(chunk_task) output_reader.max_chunksize = 1 st = time() assert len(output_reader.read(nsios)) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
# for item in repo.odb.sha_iter(): # info = repo.odb.info(item); # shaBin = info.binsha # shaStr = info.hexsha.decode('ascii') # type = info.type # typeStr = type.decode('ascii') # if typeStr == 'commit': # # tree = Tree(repo, shaBin) # print(info.binsha, ":", shaStr) git = repo.git print(git.execute('git cat-file -t ba9a146e9da6f4aa1687f1a8a78f25e0cb748dff')) tree = Tree(repo, hex_to_bin('ba9a146e9da6f4aa1687f1a8a78f25e0cb748dff')) for shaBin, model, name in tree._cache: print(bin_to_hex(shaBin), name) # print("--------------") # comm = Commit(repo, b'\x07$\xe5\x92f\xcb]\x97\xd4\xe9s\x9c\xaf\xa0b\xfb\n1\xa1\x95') # print(comm.message) # print(comm.hexsha) # print(comm.tree) # print(comm.parents) # print(comm.committer.name, comm.committer.email) # print(comm.author.name, comm.author.email) # # blob = Blob(repo, b'\xedH\xeb\xa2\x92w\xb0w\xf592\x81\xe5_\xe8\xf77\x87c\x8c') # print(blob.data_stream.read().decode('utf-8')) # print(blob.hexsha) # commit=repo.active_branch.commit
def hexsha(self): """:return: our sha, hex encoded, 40 bytes""" return bin_to_hex(self[0])
def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" # b2a_hex produces bytes return bin_to_hex(self.binsha).decode('ascii')
def _assert_index_entries(self, entries, trees): index = IndexFile.from_tree(self.rorepo, *[self.rorepo.tree(bin_to_hex(t).decode('ascii')) for t in trees]) assert entries assert len(index.entries) == len(entries) for entry in entries: assert (entry.path, entry.stage) in index.entries
children = [] for parent in commit.parents: if parent.hexsha not in visited: children.append(parent.hexsha) if children: nodes.extend(children) else: nodes.pop() visited.add(node) post.append(node) return post if __name__ == '__main__': repo = Repo.init('test_git') # (mode, binsha) = write_tree(repo.odb, 'temp') # (mode, binsha) = write_tree(repo.odb, 'temp/00') # (mode, binsha) = write_tree(repo.odb, 'temp/01') paths = ['temp/00', 'temp/01'] names = ['a', 'b'] (mode, binsha) = write_paths(repo.odb, paths, names) tree = Tree.new(repo, bin_to_hex(binsha)) c = Commit.create_from_tree(repo, tree, 'test commit', None, True)
def test_large_data_streaming(self, rwrepo): # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream # It should be shared if possible ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) for randomize in range(2): desc = (randomize and 'random ') or '' print >> sys.stderr, "Creating %s data ..." % desc st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print >> sys.stderr, "Done (in %f s)" % elapsed # writing - due to the compression it will seem faster than it is st = time() binsha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(binsha) db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) # reading all at once st = time() ostream = ldb.stream(binsha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) # reading in chunks of 1 MiB cs = 512*1000 chunks = list() st = time() ostream = ldb.stream(binsha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert ''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) # del db file so git has something to do os.remove(db_file) # VS. CGIT ########## # CGIT ! Can using the cgit programs be faster ? proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) # write file - pump everything in at once to be a fast as possible data = stream.getvalue() # cache it st = time() proc.stdin.write(data) proc.stdin.close() gitsha = proc.stdout.read().strip() proc.wait() gelapsed_add = time() - st del(data) assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? # as its the same sha, we reuse our path fsize_kib = os.path.getsize(db_file) / 1000 print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) # compare ... print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) # read all st = time() s, t, size, data = rwrepo.git.get_object_data(gitsha) gelapsed_readall = time() - st print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) # compare print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) # read chunks st = time() s, t, size, stream = rwrepo.git.stream_object_data(gitsha) while True: data = stream.read(cs) if len(data) < cs: break # END read stream gelapsed_readchunks = time() - st print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) # compare print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" return bin_to_hex(self.binsha)
def test_large_data_streaming(self, rwrepo): # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream # It should be shared if possible ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) # writing - due to the compression it will seem faster than it is st = time() binsha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(binsha) db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) print(msg, file=sys.stderr) # reading all at once st = time() ostream = ldb.stream(binsha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) print(msg, file=sys.stderr) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(binsha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print( "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) # del db file so git has something to do os.remove(db_file) # VS. CGIT ########## # CGIT ! Can using the cgit programs be faster ? proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) # write file - pump everything in at once to be a fast as possible data = stream.getvalue() # cache it st = time() proc.stdin.write(data) proc.stdin.close() gitsha = proc.stdout.read().strip() proc.wait() gelapsed_add = time() - st del (data) assert gitsha == bin_to_hex( binsha) # we do it the same way, right ? # as its the same sha, we reuse our path fsize_kib = os.path.getsize(db_file) / 1000 msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) print(msg, file=sys.stderr) # compare ... print( "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr) # read all st = time() s, t, size, data = rwrepo.git.get_object_data(gitsha) gelapsed_readall = time() - st print( "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall), file=sys.stderr) # compare print( "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr) # read chunks st = time() s, t, size, stream = rwrepo.git.stream_object_data(gitsha) while True: data = stream.read(cs) if len(data) < cs: break # END read stream gelapsed_readchunks = time() - st msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" msg %= (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) print(msg, file=sys.stderr) # compare print( "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), file=sys.stderr)
def test_rev_parse(self): rev_parse = self.rorepo.rev_parse # try special case: This one failed at some point, make sure its fixed assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781" # start from reference num_resolved = 0 for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)): path_tokens = ref.path.split("/") for pt in range(len(path_tokens)): path_section = '/'.join(path_tokens[-(pt + 1):]) try: obj = self._assert_rev_parse(path_section) assert obj.type == ref.object.type num_resolved += 1 except (BadName, BadObject): print("failed on %s" % path_section) # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 pass # END exception handling # END for each token if ref_no == 3 - 1: break # END for each reference assert num_resolved # it works with tags ! tag = self._assert_rev_parse('0.1.4') assert tag.type == 'tag' # try full sha directly ( including type conversion ) assert tag.object == rev_parse(tag.object.hexsha) self._assert_rev_parse_types(tag.object.hexsha, tag.object) # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES rev = '0.1.4^{tree}^{tree}' assert rev_parse(rev) == tag.object.tree assert rev_parse(rev + ':CHANGES') == tag.object.tree['CHANGES'] # try to get parents from first revision - it should fail as no such revision # exists first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" commit = rev_parse(first_rev) assert len(commit.parents) == 0 assert commit.hexsha == first_rev self.failUnlessRaises(BadName, rev_parse, first_rev + "~") self.failUnlessRaises(BadName, rev_parse, first_rev + "^") # short SHA1 commit2 = rev_parse(first_rev[:20]) assert commit2 == commit commit2 = rev_parse(first_rev[:5]) assert commit2 == commit # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one # needs a tag which points to a blob # ref^0 returns commit being pointed to, same with ref~0, and ^{} tag = rev_parse('0.1.4') for token in (('~0', '^0', '^{}')): assert tag.object == rev_parse('0.1.4%s' % token) # END handle multiple tokens # try partial parsing max_items = 40 for i, binsha in enumerate(self.rorepo.odb.sha_iter()): assert rev_parse(bin_to_hex(binsha)[:8 - (i % 2)].decode('ascii')).binsha == binsha if i > max_items: # this is rather slow currently, as rev_parse returns an object # which requires accessing packs, it has some additional overhead break # END for each binsha in repo # missing closing brace commit^{tree self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree') # missing starting brace self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}') # REVLOG ####### head = self.rorepo.head # need to specify a ref when using the @ syntax self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha) # uses HEAD.ref by default assert rev_parse('@{0}') == head.commit if not head.is_detached: refspec = '%s@{0}' % head.ref.name assert rev_parse(refspec) == head.ref.commit # all additional specs work as well assert rev_parse(refspec + "^{tree}") == head.commit.tree assert rev_parse(refspec + ":CHANGES").type == 'blob' # END operate on non-detached head # position doesn't exist self.failUnlessRaises(IndexError, rev_parse, '@{10000}') # currently, nothing more is supported self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}") # the last position assert rev_parse('@{1}') != head.commit
def hexsha(self): return bin_to_hex(self[0])
def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print >> sys.stderr, "Creating %s data ..." % desc st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print >> sys.stderr, "Done (in %f s)" % elapsed string_ios.append(stream) # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % ( size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % ( size_kib, desc, elapsed_readall, size_kib / elapsed_readall) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(sha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert ''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % ( size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) # del db file so we keep something to do os.remove(db_file) # END for each randomization factor # multi-threaded mode # want two, should be supported by most of todays cpus pool.set_size(2) total_kib = 0 nsios = len(string_ios) for stream in string_ios: stream.seek(0) total_kib += len(stream.getvalue()) / 1000 # END rewind def istream_iter(): for stream in string_ios: stream.seek(0) yield IStream(str_blob_type, len(stream.getvalue()), stream) # END for each stream # END util # write multiple objects at once, involving concurrent compression reader = IteratorReader(istream_iter()) istream_reader = ldb.store_async(reader) istream_reader.task().max_chunksize = 1 st = time() istreams = istream_reader.read(nsios) assert len(istreams) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % ( pool.size(), total_kib, elapsed, total_kib / elapsed) # decompress multiple at once, by reading them # chunk size is not important as the stream will not really be decompressed # until its read istream_reader = IteratorReader(iter([i.binsha for i in istreams])) ostream_reader = ldb.stream_async(istream_reader) chunk_task = TestStreamReader(ostream_reader, "chunker", None) output_reader = pool.add_task(chunk_task) output_reader.task().max_chunksize = 1 st = time() assert len(output_reader.read(nsios)) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % ( pool.size(), total_kib, elapsed, total_kib / elapsed) # store the files, and read them back. For the reading, we use a task # as well which is chunked into one item per task. Reading all will # very quickly result in two threads handling two bytestreams of # chained compression/decompression streams reader = IteratorReader(istream_iter()) istream_reader = ldb.store_async(reader) istream_reader.task().max_chunksize = 1 istream_to_sha = lambda items: [i.binsha for i in items] istream_reader.set_post_cb(istream_to_sha) ostream_reader = ldb.stream_async(istream_reader) chunk_task = TestStreamReader(ostream_reader, "chunker", None) output_reader = pool.add_task(chunk_task) output_reader.max_chunksize = 1 st = time() assert len(output_reader.read(nsios)) == nsios elapsed = time() - st print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % ( pool.size(), total_kib, elapsed, total_kib / elapsed)
def has_object(self, sha): try: self.readable_db_object_path(bin_to_hex(sha)) return True except BadObject: return False
def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" return bin_to_hex(self.binsha)
def hexsha(self): """:return: 40 byte hex version of our 20 byte binary sha""" # b2a_hex produces bytes return bin_to_hex(self.binsha).decode('ascii')
def stream(self, sha): """For now, all lookup is done by git itself""" hexsha, typename, size, stream = self._git.stream_object_data( bin_to_hex(sha)) return OStream(hex_to_bin(hexsha), typename, size, stream)
def info(self, sha): hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) return OInfo(hex_to_bin(hexsha), typename, size)