def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW :note: requires write access to the database""" # start in 'dry-run' mode, using a simple sha1 writer ostreams = (ZippedStoreShaWriter, None) for ostreamcls in ostreams: for data in self.all_data: dry_run = ostreamcls is not None ostream = None if ostreamcls is not None: ostream = ostreamcls() assert isinstance(ostream, Sha1Writer) # END create ostream prev_ostream = db.set_ostream(ostream) assert type( prev_ostream) in ostreams or prev_ostream in ostreams istream = IStream(str_blob_type, len(data), StringIO(data)) # store returns same istream instance, with new sha set my_istream = db.store(istream) sha = istream.binsha assert my_istream is istream assert db.has_object(sha) != dry_run assert len(sha) == 20 # verify data - the slow way, we want to run code if not dry_run: info = db.info(sha) assert str_blob_type == info.type assert info.size == len(data) ostream = db.stream(sha) assert ostream.read() == data assert ostream.type == str_blob_type assert ostream.size == len(data) else: self.failUnlessRaises(BadObject, db.info, sha) self.failUnlessRaises(BadObject, db.stream, sha) # DIRECT STREAM COPY # our data hase been written in object format to the StringIO # we pasesd as output stream. No physical database representation # was created. # Test direct stream copy of object streams, the result must be # identical to what we fed in ostream.seek(0) istream.stream = ostream assert istream.binsha is not None prev_sha = istream.binsha db.set_ostream(ZippedStoreShaWriter()) db.store(istream) assert istream.binsha == prev_sha new_ostream = db.ostream() # note: only works as long our store write uses the same compression # level, which is zip_best assert ostream.getvalue() == new_ostream.getvalue()
def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW :note: requires write access to the database""" # start in 'dry-run' mode, using a simple sha1 writer ostreams = (ZippedStoreShaWriter, None) for ostreamcls in ostreams: for data in self.all_data: dry_run = ostreamcls is not None ostream = None if ostreamcls is not None: ostream = ostreamcls() assert isinstance(ostream, Sha1Writer) # END create ostream prev_ostream = db.set_ostream(ostream) assert type(prev_ostream) in ostreams or prev_ostream in ostreams istream = IStream(str_blob_type, len(data), StringIO(data)) # store returns same istream instance, with new sha set my_istream = db.store(istream) sha = istream.binsha assert my_istream is istream assert db.has_object(sha) != dry_run assert len(sha) == 20 # verify data - the slow way, we want to run code if not dry_run: info = db.info(sha) assert str_blob_type == info.type assert info.size == len(data) ostream = db.stream(sha) assert ostream.read() == data assert ostream.type == str_blob_type assert ostream.size == len(data) else: self.failUnlessRaises(BadObject, db.info, sha) self.failUnlessRaises(BadObject, db.stream, sha) # DIRECT STREAM COPY # our data hase been written in object format to the StringIO # we pasesd as output stream. No physical database representation # was created. # Test direct stream copy of object streams, the result must be # identical to what we fed in ostream.seek(0) istream.stream = ostream assert istream.binsha is not None prev_sha = istream.binsha db.set_ostream(ZippedStoreShaWriter()) db.store(istream) assert istream.binsha == prev_sha new_ostream = db.ostream() # note: only works as long our store write uses the same compression # level, which is zip_best assert ostream.getvalue() == new_ostream.getvalue()
def mktree(self, odb, entries): """create a tree from the given tree entries and safe it to the database""" sio = StringIO() tree_to_stream(entries, sio.write) sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) return istream.binsha
def _assert_object_writing_simple(self, db): # write a bunch of objects and query their streams and info null_objs = db.size() ni = 250 for i in xrange(ni): data = pack(">L", i) istream = IStream(str_blob_type, len(data), StringIO(data)) new_istream = db.store(istream) assert new_istream is istream assert db.has_object(istream.binsha) info = db.info(istream.binsha) assert isinstance(info, OInfo) assert info.type == istream.type and info.size == istream.size stream = db.stream(istream.binsha) assert isinstance(stream, OStream) assert stream.binsha == info.binsha and stream.type == info.type assert stream.read() == data # END for each item assert db.size() == null_objs + ni shas = list(db.sha_iter()) assert len(shas) == db.size() assert len(shas[0]) == 20
def test_commit_serialization(self): assert_commit_serialization(self.rwrepo, self.head_sha_2k, True) rwrepo = self.rwrepo make_object = rwrepo.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(self.head_sha_2k) commits = list() nc = 5000 st = time() for i in xrange(nc): cm = Commit( rwrepo, Commit.NULL_BIN_SHA, hc.tree, hc.author, hc.authored_date, hc.author_tz_offset, hc.committer, hc.committed_date, hc.committer_tz_offset, str(i), parents=hc.parents, encoding=hc.encoding) stream = StringIO() cm._serialize(stream) slen = stream.tell() stream.seek(0) cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha # END commit creation elapsed = time() - st print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
def write_tree_from_cache(entries, odb, sl, si=0): """Create a tree from the given sorted list of entries and put the respective trees into the given object database :param entries: **sorted** list of IndexEntries :param odb: object database to store the trees in :param si: start index at which we should start creating subtrees :param sl: slice indicating the range we should process on the entries list :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name""" tree_items = list() tree_items_append = tree_items.append ci = sl.start end = sl.stop while ci < end: entry = entries[ci] if entry.stage != 0: raise UnmergedEntriesError(entry) # END abort on unmerged ci += 1 rbound = entry.path.find('/', si) if rbound == -1: # its not a tree tree_items_append((entry.binsha, entry.mode, entry.path[si:])) else: # find common base range base = entry.path[si:rbound] xi = ci while xi < end: oentry = entries[xi] orbound = oentry.path.find('/', si) if orbound == -1 or oentry.path[si:orbound] != base: break # END abort on base mismatch xi += 1 # END find common base # enter recursion # ci - 1 as we want to count our current item as well sha, tree_entry_list = write_tree_from_cache( entries, odb, slice(ci - 1, xi), rbound + 1) tree_items_append((sha, S_IFDIR, base)) # skip ahead ci = xi # END handle bounds # END for each entry # finally create the tree sio = StringIO() tree_to_stream(tree_items, sio.write) sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) return (istream.binsha, tree_items)
def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False): """traverse all commits in the history of commit identified by commit_id and check if the serialization works. :param print_performance_info: if True, we will show how fast we are""" ns = 0 # num serializations nds = 0 # num deserializations st = time.time() for cm in rwrepo.commit(commit_id).traverse(): nds += 1 # assert that we deserialize commits correctly, hence we get the same # sha on serialization stream = StringIO() cm._serialize(stream) ns += 1 streamlen = stream.tell() stream.seek(0) istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) assert istream.hexsha == cm.hexsha nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, cm.author, cm.authored_date, cm.author_tz_offset, cm.committer, cm.committed_date, cm.committer_tz_offset, cm.message, cm.parents, cm.encoding) assert nc.parents == cm.parents stream = StringIO() nc._serialize(stream) ns += 1 streamlen = stream.tell() stream.seek(0) # reuse istream istream.size = streamlen istream.stream = stream istream.binsha = None nc.binsha = rwrepo.odb.store(istream).binsha # if it worked, we have exactly the same contents ! assert nc.hexsha == cm.hexsha # END check commits elapsed = time.time() - st if print_performance_info: print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % ( ns, nds, elapsed, ns / elapsed, nds / elapsed)
def store_path(filepath): """Store file at filepath in the database and return the base index entry""" st = os.lstat(filepath) # handles non-symlinks as well stream = None if S_ISLNK(st.st_mode): stream = StringIO(os.readlink(filepath)) else: stream = open(filepath, 'rb') # END handle stream fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) fprogress(filepath, True, filepath) return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), istream.binsha, 0, to_native_path_linux(filepath)))
def test_base(self): ldb = PureLooseObjectODB(fixture_path("../../../.git/objects")) for sha1 in ldb.sha_iter(): oinfo = ldb.info(sha1) ostream = ldb.stream(sha1) assert oinfo[:3] == ostream[:3] assert len(ostream.read()) == ostream.size assert ldb.has_object(oinfo.binsha) # END for each sha in database # assure we close all files try: del(ostream) del(oinfo) except UnboundLocalError: pass # END ignore exception if there are no loose objects data = "my data" istream = IStream("blob", len(data), StringIO(data)) # the object does not yet have a sha assert istream.binsha is None ldb.store(istream) # now the sha is set assert len(istream.binsha) == 20 assert ldb.has_object(istream.binsha) # async operation # Create a reader from an iterator reader = IteratorReader(ldb.sha_iter()) # get reader for object streams info_reader = ldb.stream_async(reader) # read one info = info_reader.read(1)[0] # read all the rest until depletion ostreams = info_reader.read() # set the pool to use two threads pool.set_size(2) # synchronize the mode of operation pool.set_size(0)
def _store_path(self, filepath, fprogress): """Store file at filepath in the database and return the base index entry Needs the git_working_dir decorator active ! This must be assured in the calling code""" st = os.lstat(filepath) # handles non-symlinks as well stream = None if S_ISLNK(st.st_mode): stream = StringIO(os.readlink(filepath)) else: stream = open(filepath, 'rb') # END handle stream fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) fprogress(filepath, True, filepath) return BaseIndexEntry( (stat_mode_to_index_mode(st.st_mode), istream.binsha, 0, to_native_path_linux(filepath)))
def stream_copy(self, sha_iter, odb): """Copy the streams as identified by sha's yielded by sha_iter into the given odb The streams will be copied directly :note: the object will only be written if it did not exist in the target db :return: amount of streams actually copied into odb. If smaller than the amount of input shas, one or more objects did already exist in odb""" count = 0 for sha in sha_iter: if odb.has_object(sha): continue # END check object existance ostream = self.stream(sha) # compressed data including header sio = StringIO(ostream.stream.data()) istream = IStream(ostream.type, ostream.size, sio, sha) odb.store(istream) count += 1 # END for each sha return count
def test_streams(self): # test info sha = NULL_BIN_SHA s = 20 blob_id = 3 info = OInfo(sha, str_blob_type, s) assert info.binsha == sha assert info.type == str_blob_type assert info.type_id == blob_id assert info.size == s # test pack info # provides type_id pinfo = OPackInfo(0, blob_id, s) assert pinfo.type == str_blob_type assert pinfo.type_id == blob_id assert pinfo.pack_offset == 0 dpinfo = ODeltaPackInfo(0, blob_id, s, sha) assert dpinfo.type == str_blob_type assert dpinfo.type_id == blob_id assert dpinfo.delta_info == sha assert dpinfo.pack_offset == 0 # test ostream stream = DummyStream() ostream = OStream(*(info + (stream, ))) assert ostream.stream is stream ostream.read(15) stream._assert() assert stream.bytes == 15 ostream.read(20) assert stream.bytes == 20 # test packstream postream = OPackStream(*(pinfo + (stream, ))) assert postream.stream is stream postream.read(10) stream._assert() assert stream.bytes == 10 # test deltapackstream dpostream = ODeltaPackStream(*(dpinfo + (stream, ))) dpostream.stream is stream dpostream.read(5) stream._assert() assert stream.bytes == 5 # derive with own args DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert() # test istream istream = IStream(str_blob_type, s, stream) assert istream.binsha == None istream.binsha = sha assert istream.binsha == sha assert len(istream.binsha) == 20 assert len(istream.hexsha) == 40 assert istream.size == s istream.size = s * 2 istream.size == s * 2 assert istream.type == str_blob_type istream.type = "something" assert istream.type == "something" assert istream.stream is stream istream.stream = None assert istream.stream is None assert istream.error is None istream.error = Exception() assert isinstance(istream.error, Exception)
def istream_generator(offset=0, ni=ni): for data_src in xrange(ni): data = str(data_src + offset) yield IStream(str_blob_type, len(data), StringIO(data))
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" parents = parent_commits if parent_commits is None: try: parent_commits = [ repo.head.commit ] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ committer = Actor.committer(cr) author = Actor.author(cr) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg="commit: %s" % message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit import git.refs master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit