def __setitem__(self, file_name: str, contents: List[bytes]): tree, name = self._get_subtree(file_name) content = b''.join(contents) self.tree = self._modify(self.tree, lambda t: t.add(self.tree.repo.odb.store(IStream(git.Blob.type, len(content), BytesIO(content))).binsha, git.Blob.file_mode, name, force=True))
def set_content(self, path, content, commit_msg=None): """ Add new content in `path` """ # Create the stream stream = StringIO(content.encode('utf-8')) stream.seek(0, 2) streamlen = stream.tell() stream.seek(0) istream = IStream("blob", streamlen, stream) # Add it to the repository object database self.repo.odb.store(istream) # Create the corresponding blob object blob = Blob(self.repo, istream.binsha, 0100644, path.encode('utf-8')) # Commit self.repo.index.add([IndexEntry.from_blob(blob)]) if not commit_msg: commit_msg = ugettext('Update Wiki: {0}').format( path.encode('utf-8')).encode('utf-8') self.repo.index.commit(commit_msg) # Update internal informations self._parse()
def test_large_data_streaming(self, path): ldb = LooseObjectDB(path) string_ios = list() # list of streams we previously created # serial mode for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) string_ios.append(stream) # writing - due to the compression it will seem faster than it is st = time() sha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(bin_to_hex(sha)) fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / (elapsed_add or 1)), file=sys.stderr) # reading all at once st = time() ostream = ldb.stream(sha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / (elapsed_readall or 1)), file=sys.stderr) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(sha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / (elapsed_readchunks or 1)), file=sys.stderr) # del db file so we keep something to do ostream = None # To release the file handle (win) remove(db_file)
def test_base(self): ldb = LooseObjectDB(os.path.join(self.gitrepopath, 'objects')) for sha1 in ldb.sha_iter(): oinfo = ldb.info(sha1) ostream = ldb.stream(sha1) assert oinfo[:3] == ostream[:3] assert len(ostream.read()) == ostream.size assert ldb.has_object(oinfo.binsha) # END for each sha in database # assure we close all files try: del(ostream) del(oinfo) except UnboundLocalError: pass # END ignore exception if there are no loose objects data = "my data".encode("ascii") istream = IStream("blob", len(data), BytesIO(data)) # the object does not yet have a sha assert istream.binsha is None ldb.store(istream) # now the sha is set assert len(istream.binsha) == 20 assert ldb.has_object(istream.binsha)
def mktree(odb, modes, binshas, names): items = [tree_item_str(mode, name, binsha) for mode, binsha, name in zip(modes, binshas, names)] items_str = ''.join(items) istream = IStream("tree", len(items_str), StringIO(items_str)) odb.store(istream) return (tree_mode, istream.binsha)
def mktree_from_iter(odb, object_info_iter): items = [tree_item_str(mode, name, binsha) for mode, binsha, name in object_info_iter] items_str = ''.join(items) istream = IStream("tree", len(items_str), StringIO(items_str)) odb.store(istream) return (tree_mode, istream.binsha)
def test_loose_correctness(self): """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back into the loose object db (memory). This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220 faster :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed data files, like archives.""" from gitdb.util import bin_to_hex pdb = GitDB(os.path.join(self.gitrepopath, 'objects')) mdb = MemoryDB() for c, sha in enumerate(pdb.sha_iter()): ostream = pdb.stream(sha) # the issue only showed on larger files which are hardly compressible ... if ostream.type != str_blob_type: continue istream = IStream(ostream.type, ostream.size, ostream.stream) mdb.store(istream) assert istream.binsha == sha, "Failed on object %s" % bin_to_hex( sha).decode('ascii') # this can fail ... sometimes, so the packs dataset should be huge assert len(mdb.stream(sha).read()) == ostream.size if c and c % 1000 == 0: print( "Verified %i loose object compression/decompression cycles" % c, file=sys.stderr) mdb._cache.clear()
def _create_blob_for(self, path): repo = self.repository page_abspath = os.path.join(os.path.split(self.repository.working_dir)[0], path) data = open(page_abspath, 'r').read() istream = IStream('blob', len(data), StringIO(data)) repo.odb.store(istream) blob_path = self._get_blob_path(path) blob = Blob(repo, istream.binsha, 0100644, blob_path) return blob
def do_put(content_path, object_hashes, content, filename): """Perform put operation. This is used in the distributed wrapper""" ldb = LooseObjectDB("/{}/objects/".format(content_path)) istream = IStream("blob", len(content), BytesIO(content)) ldb.store(istream) content_hash = istream.hexsha filename_hash = hashlib.sha1(filename.encode('utf-8')).hexdigest() result = object_hashes[filename_hash] = str( content_hash.decode('utf-8')) return result
def write_paths(odb, paths, names): items = [] for (path, name) in zip(paths, names): (mode, binsha) = write_path(odb, path) items.append(tree_item_str(mode, name, binsha)) items_str = ''.join(items) istream = IStream("tree", len(items_str), StringIO(items_str)) odb.store(istream) return (tree_mode, istream.binsha)
def write_blob_from_file(odb, f, line_size): if line_size == 0: blob_body = '' else: lines = [f.readline() for i in range(line_size)] blob_body = ''.join(lines) istream = IStream("blob", len(blob_body), StringIO(blob_body)) odb.store(istream) return (blob_mode, istream.binsha)
def _get_subtree(self, file_name) -> Tuple[git.Tree, str]: tokens = file_name.split('/') tree = self.tree for token in tokens[:-1]: if token not in tree: item = git.Tree.new_from_sha(tree.repo, tree.repo.odb.store(IStream(git.Tree.type, 0, BytesIO()))) item.path = f'{tree.path}/{token}' if tree.path else token tree = self._modify(tree, lambda t: t.add(item.binsha, tree.mode, token)) assert isinstance(token[tree], git.Tree) tree = token[tree] return tree, tokens[-1]
def assert_commit_serialization(self, rwrepo, commit_id, print_performance_info=False): """traverse all commits in the history of commit identified by commit_id and check if the serialization works. :param print_performance_info: if True, we will show how fast we are""" ns = 0 # num serializations nds = 0 # num deserializations st = time.time() for cm in rwrepo.commit(commit_id).traverse(): nds += 1 # assert that we deserialize commits correctly, hence we get the same # sha on serialization stream = BytesIO() cm._serialize(stream) ns += 1 streamlen = stream.tell() stream.seek(0) istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) self.assertEqual(istream.hexsha, cm.hexsha.encode('ascii')) nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, cm.author, cm.authored_date, cm.author_tz_offset, cm.committer, cm.committed_date, cm.committer_tz_offset, cm.message, cm.parents, cm.encoding) self.assertEqual(nc.parents, cm.parents) stream = BytesIO() nc._serialize(stream) ns += 1 streamlen = stream.tell() stream.seek(0) # reuse istream istream.size = streamlen istream.stream = stream istream.binsha = None nc.binsha = rwrepo.odb.store(istream).binsha # if it worked, we have exactly the same contents ! self.assertEqual(nc.hexsha, cm.hexsha) # END check commits elapsed = time.time() - st if print_performance_info: print( "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns / elapsed, nds / elapsed), file=sys.stderr)
def commit(self, message='', branch='master', parent=None, **kwargs): repo = Repo(self.path) index = repo.index for path, (action, data) in self.changes.iteritems(): abspath = os.path.join(self.path, path) if action == WRITE: istream = IStream(Blob.type, len(data), StringIO(data)) repo.odb.store(istream) blob = Blob(repo, istream.binsha, self.file_mode, path) index.entries[(path, 0)] = BaseIndexEntry.from_blob(blob) elif action == DELETE: #for bit in path.split(os.path.sep): self.repo.git.rm(['--cached', '--'], [path], r=True) elif action == RENAME: #print self.repo.git.status() self.repo.git.rm(['--cached', '--'], [data], r=True) data = self.read(data) istream = IStream(Blob.type, len(data), StringIO(data)) repo.odb.store(istream) blob = Blob(repo, istream.binsha, self.file_mode, path) index.entries[(path, 0)] = BaseIndexEntry.from_blob(blob) committer_name = kwargs.get('committer_name', self.committer_name) committer_email = kwargs.get('committer_email', self.committer_email) author_name = kwargs.get('author_name', self.committer_name) author_email = kwargs.get('author_email', self.committer_email) with ENV(GIT_AUTHOR_NAME=author_name, GIT_AUTHOR_EMAIL=author_email, GIT_COMMITTER_EMAIL=committer_email, GIT_COMMITTER_NAME=committer_name): commit = index.commit(message) self.changes = {} return commit.hexsha
def write_tree(odb, src_path): assert os.path.isdir(src_path) and not os.path.islink(src_path) items = [] for file in sorted(os.listdir(src_path)): (mode, binsha) = write_path(odb, os.path.join(src_path, file)) items.append(tree_item_str(mode, file, binsha)) items_str = ''.join(items) istream = IStream("tree", len(items_str), StringIO(items_str)) odb.store(istream) return (tree_mode, istream.binsha)
def test_decompress_reader_special_case(self): odb = LooseObjectDB(fixture_path('objects')) mdb = MemoryDB() for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911', b'7bb839852ed5e3a069966281bb08d50012fb309b',): ostream = odb.stream(hex_to_bin(sha)) # if there is a bug, we will be missing one byte exactly ! data = ostream.read() assert len(data) == ostream.size # Putting it back in should yield nothing new - after all, we have dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data))) assert dump.hexsha == sha
def _modify(tree: git.Tree, modifier: Callable[[git.TreeModifier], None]): """ Change the given tree and write the modified tree to the object database. """ temp_tree = git.Tree.new_from_sha(tree.repo, tree.binsha) cache = temp_tree.cache modifier(cache) cache.set_done() stream = BytesIO() temp_tree._serialize(stream) stream.seek(0) new_tree = git.Tree.new_from_sha(tree.repo, tree.repo.odb.store(IStream(git.Tree.type, len(stream.getvalue()), stream)).binsha) new_tree.path = tree.path return new_tree
def _calculate_sha_(cls, repo: 'Repo', commit: 'Commit') -> bytes: '''Calculate the sha of a commit. :param repo: Repo object the commit should be part of :param commit: Commit object for which to generate the sha ''' stream = BytesIO() commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) return istream.binsha
def test_base(self): ldb = LooseObjectDB(fixture_path("../../../.git/objects")) for sha1 in ldb.sha_iter(): oinfo = ldb.info(sha1) ostream = ldb.stream(sha1) assert oinfo[:3] == ostream[:3] assert len(ostream.read()) == ostream.size assert ldb.has_object(oinfo.binsha) # END for each sha in database # assure we close all files try: del(ostream) del(oinfo) except UnboundLocalError: pass # END ignore exception if there are no loose objects data = "my data" istream = IStream("blob", len(data), StringIO(data)) # the object does not yet have a sha assert istream.binsha is None ldb.store(istream) # now the sha is set assert len(istream.binsha) == 20 assert ldb.has_object(istream.binsha) # async operation # Create a reader from an iterator reader = IteratorReader(ldb.sha_iter()) # get reader for object streams info_reader = ldb.stream_async(reader) # read one info = info_reader.read(1)[0] # read all the rest until depletion ostreams = info_reader.read() # set the pool to use two threads pool.set_size(2) # synchronize the mode of operation pool.set_size(0)
def test_commit_serialization(self): self.assert_commit_serialization(self.gitrwrepo, '58c78e6', True) rwrepo = self.gitrwrepo make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(rwrepo.head) nc = 5000 st = time() for i in range(nc): cm = Commit(rwrepo, Commit.NULL_BIN_SHA, hc.tree, hc.author, hc.authored_date, hc.author_tz_offset, hc.committer, hc.committed_date, hc.committer_tz_offset, str(i), parents=hc.parents, encoding=hc.encoding) stream = BytesIO() cm._serialize(stream) slen = stream.tell() stream.seek(0) cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha # END commit creation elapsed = time() - st print( "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed), file=sys.stderr)
def handle_blobs(self, blobs): for b in blobs: if b.path[-3:] == '.py': if b.binsha not in self.blob_map: virgin = b.data_stream.read().decode('utf-8') fmt_code, err = self.yapify(virgin, b.path) fmt_code2 = fmt_code.encode('utf-8') if not err: istream = self.repo.odb.store( IStream(Blob.type, len(fmt_code2), BytesIO(fmt_code2))) self.blob_map[b.binsha] = istream.binsha log.debug('converted: {}'.format(b.path)) self.blob_map[b.binsha] = istream.binsha else: emsg = 'yapf error: {} {}'.format(b.path, err) self.convert_errors.append(emsg) log.warning(emsg) self.blob_map[b.binsha] = b.binsha yield Blob(self.repo, self.blob_map[b.binsha], b.mode, b.path) else: yield Blob(self.repo, b.binsha, b.mode, b.path)
def _git_raw_write_object(repo, obj): from stat import S_ISLNK from gitdb import IStream try: from cStringIO import StringIO except ImportError: from io import StringIO if obj.__class__.type == git.Blob.type: absfilepath = os.path.join(repo.working_tree_dir, obj.path) st = os.lstat(absfilepath) streamlen = st.st_size if S_ISLNK(st.st_mode): stream = StringIO(os.readlink(absfilepath)) else: stream = open(absfilepath, 'rb') else: stream = StringIO() obj._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(obj.__class__.type, streamlen, stream)) obj.binsha = istream.binsha return obj
def _do_commit(repo, path, content, commit_msg=None): """ Do a commit """ # Create the blob object stream = StringIO(content.encode('utf-8')) stream.seek(0, 2) streamlen = stream.tell() stream.seek(0) istream = IStream('blob', streamlen, stream) # Add it to the repository object database repo.odb.store(istream) # Create the corresponding Blob object blob = Blob(repo, istream.binsha, Blob.file_mode, path.encode('utf-8')) # Add blob to the index repo.index.add([IndexEntry.from_blob(blob)]) if not commit_msg: commit_msg = ugettext(u'Update Wiki: {0}').format(path).encode('utf-8') repo.index.commit(commit_msg)
def test_streams(self): # test info sha = NULL_BIN_SHA s = 20 blob_id = 3 info = OInfo(sha, str_blob_type, s) assert info.binsha == sha assert info.type == str_blob_type assert info.type_id == blob_id assert info.size == s # test pack info # provides type_id pinfo = OPackInfo(0, blob_id, s) assert pinfo.type == str_blob_type assert pinfo.type_id == blob_id assert pinfo.pack_offset == 0 dpinfo = ODeltaPackInfo(0, blob_id, s, sha) assert dpinfo.type == str_blob_type assert dpinfo.type_id == blob_id assert dpinfo.delta_info == sha assert dpinfo.pack_offset == 0 # test ostream stream = DummyStream() ostream = OStream(*(info + (stream, ))) assert ostream.stream is stream ostream.read(15) stream._assert() assert stream.bytes == 15 ostream.read(20) assert stream.bytes == 20 # test packstream postream = OPackStream(*(pinfo + (stream, ))) assert postream.stream is stream postream.read(10) stream._assert() assert stream.bytes == 10 # test deltapackstream dpostream = ODeltaPackStream(*(dpinfo + (stream, ))) dpostream.stream is stream dpostream.read(5) stream._assert() assert stream.bytes == 5 # derive with own args DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert() # test istream istream = IStream(str_blob_type, s, stream) assert istream.binsha == None istream.binsha = sha assert istream.binsha == sha assert len(istream.binsha) == 20 assert len(istream.hexsha) == 40 assert istream.size == s istream.size = s * 2 istream.size == s * 2 assert istream.type == str_blob_type istream.type = "something" assert istream.type == "something" assert istream.stream is stream istream.stream = None assert istream.stream is None assert istream.error is None istream.error = Exception() assert isinstance(istream.error, Exception)
def write_blob_from_path(odb, src_path): assert os.path.isfile(src_path) and not os.path.islink(src_path) istream = IStream("blob", os.path.getsize(src_path), io.open(src_path)) odb.store(istream) return (blob_mode, istream.binsha)
def make_blob(repo, blob_bytes, mode, path): stream = io.BytesIO(blob_bytes) istream = repo.odb.store(IStream(Blob.type, len(blob_bytes), stream)) return Blob(repo, istream.binsha, mode, path)
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" parents = parent_commits if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ default_email = get_user_id() default_name = default_email.split('@')[0] conf_name = cr.get_value('user', cls.conf_name, default_name) conf_email = cr.get_value('user', cls.conf_email, default_email) author_name = env.get(cls.env_author_name, conf_name) author_email = env.get(cls.env_author_email, default_email) committer_name = env.get(cls.env_committer_name, conf_name) committer_email = env.get(cls.env_committer_email, conf_email) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) author = Actor(author_name, author_email) committer = Actor(committer_name, committer_email) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = StringIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: try: repo.head.commit = new_commit except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit import git.refs master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit) repo.head.reference = master # END handle empty repositories # END advance head handling return new_commit
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False, author=None, committer=None, author_date=None, commit_date=None, isolated=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :param author: The name of the author, optional. If unset, the repository configuration is used to obtain this value. :param committer: The name of the committer, optional. If unset, the repository configuration is used to obtain this value. :param author_date: The timestamp for the author field :param commit_date: The timestamp for the committer field :param isolated: if true, the parent environment is not passed to the git command. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: # empty repositories have no head commit parent_commits = [] # END handle parent commits else: for p in parent_commits: if not isinstance(p, cls): raise ValueError("Parent commit '%r' must be of type %s" % (p, cls)) # end check parent commit types # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = {} if isolated else os.environ committer = committer or Actor.committer(cr, isolated=isolated) author = author or Actor.author(cr, isolated=isolated) # PARSE THE DATES unix_time = int(time()) is_dst = daylight and localtime().tm_isdst > 0 offset = altzone if is_dst else timezone author_date_str = env.get(cls.env_author_date, '') if author_date: author_time, author_offset = parse_date(author_date) elif author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if commit_date: committer_time, committer_offset = parse_date(commit_date) elif committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = BytesIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg=message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit
def home(request): data = {} if request.method == 'POST': wiki_name = request.POST['add-wiki-name'] wiki_slug = slugify(wiki_name) wiki_desc = request.POST['add-wiki-desc'] wiki_gitd = os.path.join(settings.WIKI_GIT_DIR, wiki_slug) # Check if the slug is present or not try: Wiki.objects.get(slug=wiki_slug) data['error'] = ugettext( 'Can\'t add wiki, another wiki with the same name ({0}) already exists' ).format(wiki_name) except Wiki.DoesNotExist: os.environ['GIT_AUTHOR_NAME'] = u'{0} {1}'.format( request.user.first_name, request.user.last_name).encode('utf-8') os.environ['GIT_AUTHOR_EMAIL'] = request.user.email os.environ['USERNAME'] = str(request.user.username) # Create repository repo = Repo.init(wiki_gitd) # Add first wiki file stream = StringIO('# {0}'.format(wiki_name)) stream.seek(0, 2) streamlen = stream.tell() stream.seek(0) istream = IStream("blob", streamlen, stream) repo.odb.store(istream) blob = Blob(repo, istream.binsha, 0100644, 'Home.md') repo.index.add([IndexEntry.from_blob(blob)]) repo.index.commit( ugettext('Initialize {0}').format(wiki_name).encode('utf-8')) del (os.environ['GIT_AUTHOR_NAME']) del (os.environ['GIT_AUTHOR_EMAIL']) del (os.environ['USERNAME']) # Create wiki wiki = Wiki() wiki.name = wiki_name wiki.slug = wiki_slug wiki.description = wiki_desc wiki.gitdir = wiki_gitd wiki.save() wikis = Wiki.objects.all() data['wikis'] = [wikis[x:x + 3] for x in xrange(0, len(wikis), 3)] return render_to_response(u'home.html', data, context_instance=RequestContext(request))
def test_large_data_streaming(self, rwrepo): # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream # It should be shared if possible ldb = LooseObjectDB(osp.join(rwrepo.git_dir, 'objects')) for randomize in range(2): desc = (randomize and 'random ') or '' print("Creating %s data ..." % desc, file=sys.stderr) st = time() size, stream = make_memory_file(self.large_data_size_bytes, randomize) elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) # writing - due to the compression it will seem faster than it is st = time() binsha = ldb.store(IStream('blob', size, stream)).binsha elapsed_add = time() - st assert ldb.has_object(binsha) db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) fsize_kib = osp.getsize(db_file) / 1000 size_kib = size / 1000 msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) print(msg, file=sys.stderr) # reading all at once st = time() ostream = ldb.stream(binsha) shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) print(msg, file=sys.stderr) # reading in chunks of 1 MiB cs = 512 * 1000 chunks = list() st = time() ostream = ldb.stream(binsha) while True: data = ostream.read(cs) chunks.append(data) if len(data) < cs: break # END read in chunks elapsed_readchunks = time() - st stream.seek(0) assert b''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr) # del db file so git has something to do ostream = None import gc gc.collect() os.remove(db_file) # VS. CGIT ########## # CGIT ! Can using the cgit programs be faster ? proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) # write file - pump everything in at once to be a fast as possible data = stream.getvalue() # cache it st = time() proc.stdin.write(data) proc.stdin.close() gitsha = proc.stdout.read().strip() proc.wait() gelapsed_add = time() - st del(data) assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? # as its the same sha, we reuse our path fsize_kib = osp.getsize(db_file) / 1000 msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) print(msg, file=sys.stderr) # compare ... print("Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr) # read all st = time() hexsha, typename, size, data = rwrepo.git.get_object_data(gitsha) # @UnusedVariable gelapsed_readall = time() - st print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall), file=sys.stderr) # compare print("Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr) # read chunks st = time() hexsha, typename, size, stream = rwrepo.git.stream_object_data(gitsha) # @UnusedVariable while True: data = stream.read(cs) if len(data) < cs: break # END read stream gelapsed_readchunks = time() - st msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" msg %= (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) print(msg, file=sys.stderr) # compare print("Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), file=sys.stderr)