예제 #1
0
 def __setitem__(self, file_name: str, contents: List[bytes]):
     tree, name = self._get_subtree(file_name)
     content = b''.join(contents)
     self.tree = self._modify(self.tree, lambda t: t.add(self.tree.repo.odb.store(IStream(git.Blob.type,
                                                                                          len(content),
                                                                                          BytesIO(content))).binsha,
                                                         git.Blob.file_mode, name, force=True))
예제 #2
0
    def set_content(self, path, content, commit_msg=None):
        """ Add new content in `path` """

        # Create the stream
        stream = StringIO(content.encode('utf-8'))
        stream.seek(0, 2)
        streamlen = stream.tell()
        stream.seek(0)

        istream = IStream("blob", streamlen, stream)

        # Add it to the repository object database
        self.repo.odb.store(istream)

        # Create the corresponding blob object
        blob = Blob(self.repo, istream.binsha, 0100644, path.encode('utf-8'))

        # Commit
        self.repo.index.add([IndexEntry.from_blob(blob)])

        if not commit_msg:
            commit_msg = ugettext('Update Wiki: {0}').format(
                path.encode('utf-8')).encode('utf-8')

        self.repo.index.commit(commit_msg)

        # Update internal informations
        self._parse()
예제 #3
0
    def test_large_data_streaming(self, path):
        ldb = LooseObjectDB(path)
        string_ios = list()         # list of streams we previously created

        # serial mode
        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)
            string_ios.append(stream)

            # writing - due to the compression it will seem faster than it is
            st = time()
            sha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(sha)
            db_file = ldb.readable_db_object_path(bin_to_hex(sha))
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" %
                  (size_kib, fsize_kib, desc, elapsed_add, size_kib / (elapsed_add or 1)), file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(sha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" %
                  (size_kib, desc, elapsed_readall, size_kib / (elapsed_readall or 1)), file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(sha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" %
                  (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / (elapsed_readchunks or 1)), file=sys.stderr)

            # del db file so we keep something to do
            ostream = None  # To release the file handle (win)
            remove(db_file)
예제 #4
0
    def test_base(self):
        ldb = LooseObjectDB(os.path.join(self.gitrepopath, 'objects'))

        for sha1 in ldb.sha_iter():
            oinfo = ldb.info(sha1)
            ostream = ldb.stream(sha1)
            assert oinfo[:3] == ostream[:3]

            assert len(ostream.read()) == ostream.size
            assert ldb.has_object(oinfo.binsha)
        # END for each sha in database
        # assure we close all files
        try:
            del(ostream)
            del(oinfo)
        except UnboundLocalError:
            pass
        # END ignore exception if there are no loose objects

        data = "my data".encode("ascii")
        istream = IStream("blob", len(data), BytesIO(data))

        # the object does not yet have a sha
        assert istream.binsha is None
        ldb.store(istream)
        # now the sha is set
        assert len(istream.binsha) == 20
        assert ldb.has_object(istream.binsha)
예제 #5
0
def mktree(odb, modes, binshas, names):
    items = [tree_item_str(mode, name, binsha) for mode, binsha, name in zip(modes, binshas, names)]
    items_str = ''.join(items)

    istream = IStream("tree", len(items_str), StringIO(items_str))
    odb.store(istream)
    return (tree_mode, istream.binsha)
예제 #6
0
def mktree_from_iter(odb, object_info_iter):
    items = [tree_item_str(mode, name, binsha) for mode, binsha, name in object_info_iter]
    items_str = ''.join(items)

    istream = IStream("tree", len(items_str), StringIO(items_str))
    odb.store(istream)
    return (tree_mode, istream.binsha)
예제 #7
0
    def test_loose_correctness(self):
        """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
        into the loose object db (memory).
        This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
        faster
        :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
        data files, like archives."""
        from gitdb.util import bin_to_hex
        pdb = GitDB(os.path.join(self.gitrepopath, 'objects'))
        mdb = MemoryDB()
        for c, sha in enumerate(pdb.sha_iter()):
            ostream = pdb.stream(sha)
            # the issue only showed on larger files which are hardly compressible ...
            if ostream.type != str_blob_type:
                continue
            istream = IStream(ostream.type, ostream.size, ostream.stream)
            mdb.store(istream)
            assert istream.binsha == sha, "Failed on object %s" % bin_to_hex(
                sha).decode('ascii')
            # this can fail ... sometimes, so the packs dataset should be huge
            assert len(mdb.stream(sha).read()) == ostream.size

            if c and c % 1000 == 0:
                print(
                    "Verified %i loose object compression/decompression cycles"
                    % c,
                    file=sys.stderr)
            mdb._cache.clear()
예제 #8
0
파일: Git.py 프로젝트: aawilson/waliki
 def _create_blob_for(self, path):
     repo = self.repository
     page_abspath = os.path.join(os.path.split(self.repository.working_dir)[0], path)
     data = open(page_abspath, 'r').read()
     istream = IStream('blob', len(data), StringIO(data))
     repo.odb.store(istream)
     blob_path = self._get_blob_path(path)
     blob = Blob(repo, istream.binsha, 0100644, blob_path)
     return blob
예제 #9
0
 def do_put(content_path, object_hashes, content, filename):
     """Perform put operation. This is used in the distributed wrapper"""
     ldb = LooseObjectDB("/{}/objects/".format(content_path))
     istream = IStream("blob", len(content), BytesIO(content))
     ldb.store(istream)
     content_hash = istream.hexsha
     filename_hash = hashlib.sha1(filename.encode('utf-8')).hexdigest()
     result = object_hashes[filename_hash] = str(
         content_hash.decode('utf-8'))
     return result
예제 #10
0
def write_paths(odb, paths, names):
    items = []
    for (path, name) in zip(paths, names):
        (mode, binsha) = write_path(odb, path)

        items.append(tree_item_str(mode, name, binsha))

    items_str = ''.join(items)
    istream = IStream("tree", len(items_str), StringIO(items_str))
    odb.store(istream)
    return (tree_mode, istream.binsha)
예제 #11
0
def write_blob_from_file(odb, f, line_size):
    if line_size == 0:
        blob_body = ''
    else:
        lines = [f.readline() for i in range(line_size)]
        blob_body = ''.join(lines)

    istream = IStream("blob", len(blob_body), StringIO(blob_body))
    odb.store(istream)

    return (blob_mode, istream.binsha)
예제 #12
0
 def _get_subtree(self, file_name) -> Tuple[git.Tree, str]:
     tokens = file_name.split('/')
     tree = self.tree
     for token in tokens[:-1]:
         if token not in tree:
             item = git.Tree.new_from_sha(tree.repo, tree.repo.odb.store(IStream(git.Tree.type, 0, BytesIO())))
             item.path = f'{tree.path}/{token}' if tree.path else token
             tree = self._modify(tree, lambda t: t.add(item.binsha, tree.mode, token))
         assert isinstance(token[tree], git.Tree)
         tree = token[tree]
     return tree, tokens[-1]
예제 #13
0
    def assert_commit_serialization(self,
                                    rwrepo,
                                    commit_id,
                                    print_performance_info=False):
        """traverse all commits in the history of commit identified by commit_id and check
        if the serialization works.
        :param print_performance_info: if True, we will show how fast we are"""
        ns = 0  # num serializations
        nds = 0  # num deserializations

        st = time.time()
        for cm in rwrepo.commit(commit_id).traverse():
            nds += 1

            # assert that we deserialize commits correctly, hence we get the same
            # sha on serialization
            stream = BytesIO()
            cm._serialize(stream)
            ns += 1
            streamlen = stream.tell()
            stream.seek(0)

            istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream))
            self.assertEqual(istream.hexsha, cm.hexsha.encode('ascii'))

            nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, cm.author,
                        cm.authored_date, cm.author_tz_offset, cm.committer,
                        cm.committed_date, cm.committer_tz_offset, cm.message,
                        cm.parents, cm.encoding)

            self.assertEqual(nc.parents, cm.parents)
            stream = BytesIO()
            nc._serialize(stream)
            ns += 1
            streamlen = stream.tell()
            stream.seek(0)

            # reuse istream
            istream.size = streamlen
            istream.stream = stream
            istream.binsha = None
            nc.binsha = rwrepo.odb.store(istream).binsha

            # if it worked, we have exactly the same contents !
            self.assertEqual(nc.hexsha, cm.hexsha)
        # END check commits
        elapsed = time.time() - st

        if print_performance_info:
            print(
                "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s"
                % (ns, nds, elapsed, ns / elapsed, nds / elapsed),
                file=sys.stderr)
예제 #14
0
파일: backend.py 프로젝트: wil/vacuous
    def commit(self, message='', branch='master', parent=None, **kwargs):
        repo = Repo(self.path)
        index = repo.index

        for path, (action, data) in self.changes.iteritems():
            abspath = os.path.join(self.path, path)
            if action == WRITE:
                istream = IStream(Blob.type, len(data), StringIO(data))
                repo.odb.store(istream)
                blob = Blob(repo, istream.binsha, self.file_mode, path)
                index.entries[(path, 0)] = BaseIndexEntry.from_blob(blob)

            elif action == DELETE:
                #for bit in path.split(os.path.sep):
                self.repo.git.rm(['--cached', '--'], [path], r=True)

            elif action == RENAME:
                #print self.repo.git.status()
                self.repo.git.rm(['--cached', '--'], [data], r=True)
                data = self.read(data)
                istream = IStream(Blob.type, len(data), StringIO(data))
                repo.odb.store(istream)
                blob = Blob(repo, istream.binsha, self.file_mode, path)
                index.entries[(path, 0)] = BaseIndexEntry.from_blob(blob)

        committer_name = kwargs.get('committer_name', self.committer_name)
        committer_email = kwargs.get('committer_email', self.committer_email)
        author_name = kwargs.get('author_name', self.committer_name)
        author_email = kwargs.get('author_email', self.committer_email)

        with ENV(GIT_AUTHOR_NAME=author_name,
                 GIT_AUTHOR_EMAIL=author_email,
                 GIT_COMMITTER_EMAIL=committer_email,
                 GIT_COMMITTER_NAME=committer_name):
            commit = index.commit(message)

        self.changes = {}

        return commit.hexsha
예제 #15
0
def write_tree(odb, src_path):
    assert os.path.isdir(src_path) and not os.path.islink(src_path)

    items = []
    for file in sorted(os.listdir(src_path)):
        (mode, binsha) = write_path(odb, os.path.join(src_path, file))

        items.append(tree_item_str(mode, file, binsha))

    items_str = ''.join(items)
    istream = IStream("tree", len(items_str), StringIO(items_str))
    odb.store(istream)
    return (tree_mode, istream.binsha)
예제 #16
0
    def test_decompress_reader_special_case(self):
        odb = LooseObjectDB(fixture_path('objects'))
        mdb = MemoryDB()
        for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
                    b'7bb839852ed5e3a069966281bb08d50012fb309b',):
            ostream = odb.stream(hex_to_bin(sha))

            # if there is a bug, we will be missing one byte exactly !
            data = ostream.read()
            assert len(data) == ostream.size

            # Putting it back in should yield nothing new - after all, we have
            dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
            assert dump.hexsha == sha
예제 #17
0
    def _modify(tree: git.Tree, modifier: Callable[[git.TreeModifier], None]):
        """ Change the given tree and write the modified tree to the object database. """

        temp_tree = git.Tree.new_from_sha(tree.repo, tree.binsha)
        cache = temp_tree.cache
        modifier(cache)
        cache.set_done()
        stream = BytesIO()
        temp_tree._serialize(stream)
        stream.seek(0)
        new_tree = git.Tree.new_from_sha(tree.repo, tree.repo.odb.store(IStream(git.Tree.type, len(stream.getvalue()),
                                                                        stream)).binsha)
        new_tree.path = tree.path
        return new_tree
예제 #18
0
파일: commit.py 프로젝트: jrhauser/jrhauser
    def _calculate_sha_(cls, repo: 'Repo', commit: 'Commit') -> bytes:
        '''Calculate the sha of a commit.

        :param repo: Repo object the commit should be part of
        :param commit: Commit object for which to generate the sha
        '''

        stream = BytesIO()
        commit._serialize(stream)
        streamlen = stream.tell()
        stream.seek(0)

        istream = repo.odb.store(IStream(cls.type, streamlen, stream))
        return istream.binsha
예제 #19
0
	def test_base(self):
		ldb = LooseObjectDB(fixture_path("../../../.git/objects"))
		
		for sha1 in ldb.sha_iter():
			oinfo = ldb.info(sha1)
			ostream = ldb.stream(sha1)
			assert oinfo[:3] == ostream[:3]
			
			assert len(ostream.read()) == ostream.size
			assert ldb.has_object(oinfo.binsha)
		# END for each sha in database
		# assure we close all files
		try:
			del(ostream)
			del(oinfo)
		except UnboundLocalError:
			pass
		# END ignore exception if there are no loose objects
			
		data = "my data"
		istream = IStream("blob", len(data), StringIO(data))
		
		# the object does not yet have a sha
		assert istream.binsha is None
		ldb.store(istream)
		# now the sha is set
		assert len(istream.binsha) == 20
		assert ldb.has_object(istream.binsha)
		
		
		# async operation
		# Create a reader from an iterator
		reader = IteratorReader(ldb.sha_iter())
		
		# get reader for object streams
		info_reader = ldb.stream_async(reader)
		
		# read one
		info = info_reader.read(1)[0]
		
		# read all the rest until depletion
		ostreams = info_reader.read()
		
		# set the pool to use two threads
		pool.set_size(2)
		
		# synchronize the mode of operation
		pool.set_size(0)
예제 #20
0
    def test_commit_serialization(self):
        self.assert_commit_serialization(self.gitrwrepo, '58c78e6', True)

        rwrepo = self.gitrwrepo
        make_object = rwrepo.odb.store
        # direct serialization - deserialization can be tested afterwards
        # serialization is probably limited on IO
        hc = rwrepo.commit(rwrepo.head)

        nc = 5000
        st = time()
        for i in range(nc):
            cm = Commit(rwrepo,
                        Commit.NULL_BIN_SHA,
                        hc.tree,
                        hc.author,
                        hc.authored_date,
                        hc.author_tz_offset,
                        hc.committer,
                        hc.committed_date,
                        hc.committer_tz_offset,
                        str(i),
                        parents=hc.parents,
                        encoding=hc.encoding)

            stream = BytesIO()
            cm._serialize(stream)
            slen = stream.tell()
            stream.seek(0)

            cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
        # END commit creation
        elapsed = time() - st

        print(
            "Serialized %i commits to loose objects in %f s ( %f commits / s )"
            % (nc, elapsed, nc / elapsed),
            file=sys.stderr)
예제 #21
0
    def handle_blobs(self, blobs):
        for b in blobs:
            if b.path[-3:] == '.py':
                if b.binsha not in self.blob_map:
                    virgin = b.data_stream.read().decode('utf-8')
                    fmt_code, err = self.yapify(virgin, b.path)
                    fmt_code2 = fmt_code.encode('utf-8')
                    if not err:
                        istream = self.repo.odb.store(
                            IStream(Blob.type, len(fmt_code2),
                                    BytesIO(fmt_code2)))
                        self.blob_map[b.binsha] = istream.binsha
                        log.debug('converted: {}'.format(b.path))
                        self.blob_map[b.binsha] = istream.binsha
                    else:
                        emsg = 'yapf error: {} {}'.format(b.path, err)
                        self.convert_errors.append(emsg)
                        log.warning(emsg)
                        self.blob_map[b.binsha] = b.binsha

                yield Blob(self.repo, self.blob_map[b.binsha], b.mode, b.path)
            else:
                yield Blob(self.repo, b.binsha, b.mode, b.path)
예제 #22
0
파일: git.py 프로젝트: gudi1989/fabdeploit
def _git_raw_write_object(repo, obj):
    from stat import S_ISLNK
    from gitdb import IStream
    try:
        from cStringIO import StringIO
    except ImportError:
        from io import StringIO

    if obj.__class__.type == git.Blob.type:
        absfilepath = os.path.join(repo.working_tree_dir, obj.path)
        st = os.lstat(absfilepath)
        streamlen = st.st_size
        if S_ISLNK(st.st_mode):
            stream = StringIO(os.readlink(absfilepath))
        else:
            stream = open(absfilepath, 'rb')
    else:
        stream = StringIO()
        obj._serialize(stream)
        streamlen = stream.tell()
        stream.seek(0)
    istream = repo.odb.store(IStream(obj.__class__.type, streamlen, stream))
    obj.binsha = istream.binsha
    return obj
예제 #23
0
def _do_commit(repo, path, content, commit_msg=None):
    """ Do a commit """

    # Create the blob object
    stream = StringIO(content.encode('utf-8'))
    stream.seek(0, 2)
    streamlen = stream.tell()
    stream.seek(0)

    istream = IStream('blob', streamlen, stream)

    # Add it to the repository object database
    repo.odb.store(istream)

    # Create the corresponding Blob object
    blob = Blob(repo, istream.binsha, Blob.file_mode, path.encode('utf-8'))

    # Add blob to the index
    repo.index.add([IndexEntry.from_blob(blob)])

    if not commit_msg:
        commit_msg = ugettext(u'Update Wiki: {0}').format(path).encode('utf-8')

    repo.index.commit(commit_msg)
예제 #24
0
파일: test_base.py 프로젝트: Kronuz/gitdb
    def test_streams(self):
        # test info
        sha = NULL_BIN_SHA
        s = 20
        blob_id = 3

        info = OInfo(sha, str_blob_type, s)
        assert info.binsha == sha
        assert info.type == str_blob_type
        assert info.type_id == blob_id
        assert info.size == s

        # test pack info
        # provides type_id
        pinfo = OPackInfo(0, blob_id, s)
        assert pinfo.type == str_blob_type
        assert pinfo.type_id == blob_id
        assert pinfo.pack_offset == 0

        dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
        assert dpinfo.type == str_blob_type
        assert dpinfo.type_id == blob_id
        assert dpinfo.delta_info == sha
        assert dpinfo.pack_offset == 0

        # test ostream
        stream = DummyStream()
        ostream = OStream(*(info + (stream, )))
        assert ostream.stream is stream
        ostream.read(15)
        stream._assert()
        assert stream.bytes == 15
        ostream.read(20)
        assert stream.bytes == 20

        # test packstream
        postream = OPackStream(*(pinfo + (stream, )))
        assert postream.stream is stream
        postream.read(10)
        stream._assert()
        assert stream.bytes == 10

        # test deltapackstream
        dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
        dpostream.stream is stream
        dpostream.read(5)
        stream._assert()
        assert stream.bytes == 5

        # derive with own args
        DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()

        # test istream
        istream = IStream(str_blob_type, s, stream)
        assert istream.binsha == None
        istream.binsha = sha
        assert istream.binsha == sha

        assert len(istream.binsha) == 20
        assert len(istream.hexsha) == 40

        assert istream.size == s
        istream.size = s * 2
        istream.size == s * 2
        assert istream.type == str_blob_type
        istream.type = "something"
        assert istream.type == "something"
        assert istream.stream is stream
        istream.stream = None
        assert istream.stream is None

        assert istream.error is None
        istream.error = Exception()
        assert isinstance(istream.error, Exception)
예제 #25
0
def write_blob_from_path(odb, src_path):
    assert os.path.isfile(src_path) and not os.path.islink(src_path)
    istream = IStream("blob", os.path.getsize(src_path), io.open(src_path))
    odb.store(istream)
    return (blob_mode, istream.binsha)
예제 #26
0
def make_blob(repo, blob_bytes, mode, path):
    stream = io.BytesIO(blob_bytes)
    istream = repo.odb.store(IStream(Blob.type, len(blob_bytes), stream))
    return Blob(repo, istream.binsha, mode, path)
    def create_from_tree(cls,
                         repo,
                         tree,
                         message,
                         parent_commits=None,
                         head=False):
        """Commit the given tree, creating a commit object.
		
		:param repo: Repo object the commit should be part of 
		:param tree: Tree object or hex or bin sha 
			the tree of the new commit
		:param message: Commit message. It may be an empty string if no message is provided.
			It will be converted to a string in any case.
		:param parent_commits:
			Optional Commit objects to use as parents for the new commit.
			If empty list, the commit will have no parents at all and become 
			a root commit.
			If None , the current head commit will be the parent of the 
			new commit object
		:param head:
			If True, the HEAD will be advanced to the new commit automatically.
			Else the HEAD will remain pointing on the previous commit. This could 
			lead to undesired results when diffing files.
			
		:return: Commit object representing the new commit
			
		:note:
			Additional information about the committer and Author are taken from the
			environment or from the git configuration, see git-commit-tree for 
			more information"""
        parents = parent_commits
        if parent_commits is None:
            try:
                parent_commits = [repo.head.commit]
            except ValueError:
                # empty repositories have no head commit
                parent_commits = list()
            # END handle parent commits
        # END if parent commits are unset

        # retrieve all additional information, create a commit object, and
        # serialize it
        # Generally:
        # * Environment variables override configuration values
        # * Sensible defaults are set according to the git documentation

        # COMMITER AND AUTHOR INFO
        cr = repo.config_reader()
        env = os.environ
        default_email = get_user_id()
        default_name = default_email.split('@')[0]

        conf_name = cr.get_value('user', cls.conf_name, default_name)
        conf_email = cr.get_value('user', cls.conf_email, default_email)

        author_name = env.get(cls.env_author_name, conf_name)
        author_email = env.get(cls.env_author_email, default_email)

        committer_name = env.get(cls.env_committer_name, conf_name)
        committer_email = env.get(cls.env_committer_email, conf_email)

        # PARSE THE DATES
        unix_time = int(time())
        offset = altzone

        author_date_str = env.get(cls.env_author_date, '')
        if author_date_str:
            author_time, author_offset = parse_date(author_date_str)
        else:
            author_time, author_offset = unix_time, offset
        # END set author time

        committer_date_str = env.get(cls.env_committer_date, '')
        if committer_date_str:
            committer_time, committer_offset = parse_date(committer_date_str)
        else:
            committer_time, committer_offset = unix_time, offset
        # END set committer time

        # assume utf8 encoding
        enc_section, enc_option = cls.conf_encoding.split('.')
        conf_encoding = cr.get_value(enc_section, enc_option,
                                     cls.default_encoding)

        author = Actor(author_name, author_email)
        committer = Actor(committer_name, committer_email)

        # if the tree is no object, make sure we create one - otherwise
        # the created commit object is invalid
        if isinstance(tree, str):
            tree = repo.tree(tree)
        # END tree conversion

        # CREATE NEW COMMIT
        new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time,
                         author_offset, committer, committer_time,
                         committer_offset, message, parent_commits,
                         conf_encoding)

        stream = StringIO()
        new_commit._serialize(stream)
        streamlen = stream.tell()
        stream.seek(0)

        istream = repo.odb.store(IStream(cls.type, streamlen, stream))
        new_commit.binsha = istream.binsha

        if head:
            try:
                repo.head.commit = new_commit
            except ValueError:
                # head is not yet set to the ref our HEAD points to
                # Happens on first commit
                import git.refs
                master = git.refs.Head.create(repo,
                                              repo.head.ref,
                                              commit=new_commit)
                repo.head.reference = master
            # END handle empty repositories
        # END advance head handling

        return new_commit
예제 #28
0
    def create_from_tree(cls,
                         repo,
                         tree,
                         message,
                         parent_commits=None,
                         head=False,
                         author=None,
                         committer=None,
                         author_date=None,
                         commit_date=None,
                         isolated=False):
        """Commit the given tree, creating a commit object.

        :param repo: Repo object the commit should be part of
        :param tree: Tree object or hex or bin sha
            the tree of the new commit
        :param message: Commit message. It may be an empty string if no message is provided.
            It will be converted to a string in any case.
        :param parent_commits:
            Optional Commit objects to use as parents for the new commit.
            If empty list, the commit will have no parents at all and become
            a root commit.
            If None , the current head commit will be the parent of the
            new commit object
        :param head:
            If True, the HEAD will be advanced to the new commit automatically.
            Else the HEAD will remain pointing on the previous commit. This could
            lead to undesired results when diffing files.
        :param author: The name of the author, optional. If unset, the repository
            configuration is used to obtain this value.
        :param committer: The name of the committer, optional. If unset, the
            repository configuration is used to obtain this value.
        :param author_date: The timestamp for the author field
        :param commit_date: The timestamp for the committer field
        :param isolated: if true, the parent environment is not passed to the git command.

        :return: Commit object representing the new commit

        :note:
            Additional information about the committer and Author are taken from the
            environment or from the git configuration, see git-commit-tree for
            more information"""
        if parent_commits is None:
            try:
                parent_commits = [repo.head.commit]
            except ValueError:
                # empty repositories have no head commit
                parent_commits = []
            # END handle parent commits
        else:
            for p in parent_commits:
                if not isinstance(p, cls):
                    raise ValueError("Parent commit '%r' must be of type %s" %
                                     (p, cls))
            # end check parent commit types
        # END if parent commits are unset

        # retrieve all additional information, create a commit object, and
        # serialize it
        # Generally:
        # * Environment variables override configuration values
        # * Sensible defaults are set according to the git documentation

        # COMMITER AND AUTHOR INFO
        cr = repo.config_reader()
        env = {} if isolated else os.environ

        committer = committer or Actor.committer(cr, isolated=isolated)
        author = author or Actor.author(cr, isolated=isolated)

        # PARSE THE DATES
        unix_time = int(time())
        is_dst = daylight and localtime().tm_isdst > 0
        offset = altzone if is_dst else timezone

        author_date_str = env.get(cls.env_author_date, '')
        if author_date:
            author_time, author_offset = parse_date(author_date)
        elif author_date_str:
            author_time, author_offset = parse_date(author_date_str)
        else:
            author_time, author_offset = unix_time, offset
        # END set author time

        committer_date_str = env.get(cls.env_committer_date, '')
        if commit_date:
            committer_time, committer_offset = parse_date(commit_date)
        elif committer_date_str:
            committer_time, committer_offset = parse_date(committer_date_str)
        else:
            committer_time, committer_offset = unix_time, offset
        # END set committer time

        # assume utf8 encoding
        enc_section, enc_option = cls.conf_encoding.split('.')
        conf_encoding = cr.get_value(enc_section, enc_option,
                                     cls.default_encoding)

        # if the tree is no object, make sure we create one - otherwise
        # the created commit object is invalid
        if isinstance(tree, str):
            tree = repo.tree(tree)
        # END tree conversion

        # CREATE NEW COMMIT
        new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time,
                         author_offset, committer, committer_time,
                         committer_offset, message, parent_commits,
                         conf_encoding)

        stream = BytesIO()
        new_commit._serialize(stream)
        streamlen = stream.tell()
        stream.seek(0)

        istream = repo.odb.store(IStream(cls.type, streamlen, stream))
        new_commit.binsha = istream.binsha

        if head:
            # need late import here, importing git at the very beginning throws
            # as well ...
            import git.refs
            try:
                repo.head.set_commit(new_commit, logmsg=message)
            except ValueError:
                # head is not yet set to the ref our HEAD points to
                # Happens on first commit
                master = git.refs.Head.create(repo,
                                              repo.head.ref,
                                              new_commit,
                                              logmsg="commit (initial): %s" %
                                              message)
                repo.head.set_reference(master,
                                        logmsg='commit: Switching to %s' %
                                        master)
            # END handle empty repositories
        # END advance head handling

        return new_commit
예제 #29
0
    def test_streams(self):
        # test info
        sha = NULL_BIN_SHA
        s = 20
        blob_id = 3

        info = OInfo(sha, str_blob_type, s)
        assert info.binsha == sha
        assert info.type == str_blob_type
        assert info.type_id == blob_id
        assert info.size == s

        # test pack info
        # provides type_id
        pinfo = OPackInfo(0, blob_id, s)
        assert pinfo.type == str_blob_type
        assert pinfo.type_id == blob_id
        assert pinfo.pack_offset == 0

        dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
        assert dpinfo.type == str_blob_type
        assert dpinfo.type_id == blob_id
        assert dpinfo.delta_info == sha
        assert dpinfo.pack_offset == 0

        # test ostream
        stream = DummyStream()
        ostream = OStream(*(info + (stream, )))
        assert ostream.stream is stream
        ostream.read(15)
        stream._assert()
        assert stream.bytes == 15
        ostream.read(20)
        assert stream.bytes == 20

        # test packstream
        postream = OPackStream(*(pinfo + (stream, )))
        assert postream.stream is stream
        postream.read(10)
        stream._assert()
        assert stream.bytes == 10

        # test deltapackstream
        dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
        dpostream.stream is stream
        dpostream.read(5)
        stream._assert()
        assert stream.bytes == 5

        # derive with own args
        DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()

        # test istream
        istream = IStream(str_blob_type, s, stream)
        assert istream.binsha == None
        istream.binsha = sha
        assert istream.binsha == sha

        assert len(istream.binsha) == 20
        assert len(istream.hexsha) == 40

        assert istream.size == s
        istream.size = s * 2
        istream.size == s * 2
        assert istream.type == str_blob_type
        istream.type = "something"
        assert istream.type == "something"
        assert istream.stream is stream
        istream.stream = None
        assert istream.stream is None

        assert istream.error is None
        istream.error = Exception()
        assert isinstance(istream.error, Exception)
예제 #30
0
def home(request):
    data = {}

    if request.method == 'POST':
        wiki_name = request.POST['add-wiki-name']
        wiki_slug = slugify(wiki_name)
        wiki_desc = request.POST['add-wiki-desc']
        wiki_gitd = os.path.join(settings.WIKI_GIT_DIR, wiki_slug)

        # Check if the slug is present or not
        try:
            Wiki.objects.get(slug=wiki_slug)
            data['error'] = ugettext(
                'Can\'t add wiki, another wiki with the same name ({0}) already exists'
            ).format(wiki_name)

        except Wiki.DoesNotExist:
            os.environ['GIT_AUTHOR_NAME'] = u'{0} {1}'.format(
                request.user.first_name,
                request.user.last_name).encode('utf-8')
            os.environ['GIT_AUTHOR_EMAIL'] = request.user.email
            os.environ['USERNAME'] = str(request.user.username)

            # Create repository
            repo = Repo.init(wiki_gitd)

            # Add first wiki file
            stream = StringIO('# {0}'.format(wiki_name))
            stream.seek(0, 2)
            streamlen = stream.tell()
            stream.seek(0)

            istream = IStream("blob", streamlen, stream)

            repo.odb.store(istream)

            blob = Blob(repo, istream.binsha, 0100644, 'Home.md')

            repo.index.add([IndexEntry.from_blob(blob)])
            repo.index.commit(
                ugettext('Initialize {0}').format(wiki_name).encode('utf-8'))

            del (os.environ['GIT_AUTHOR_NAME'])
            del (os.environ['GIT_AUTHOR_EMAIL'])
            del (os.environ['USERNAME'])

            # Create wiki
            wiki = Wiki()
            wiki.name = wiki_name
            wiki.slug = wiki_slug
            wiki.description = wiki_desc
            wiki.gitdir = wiki_gitd
            wiki.save()

    wikis = Wiki.objects.all()

    data['wikis'] = [wikis[x:x + 3] for x in xrange(0, len(wikis), 3)]

    return render_to_response(u'home.html',
                              data,
                              context_instance=RequestContext(request))
예제 #31
0
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
        # It should be shared if possible
        ldb = LooseObjectDB(osp.join(rwrepo.git_dir, 'objects'))

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
            fsize_kib = osp.getsize(db_file) / 1000

            size_kib = size / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
            print(msg, file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
            print(msg, file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)

            # del db file so git has something to do
            ostream = None
            import gc
            gc.collect()
            os.remove(db_file)

            # VS. CGIT
            ##########
            # CGIT ! Can using the cgit programs be faster ?
            proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)

            # write file - pump everything in at once to be a fast as possible
            data = stream.getvalue()    # cache it
            st = time()
            proc.stdin.write(data)
            proc.stdin.close()
            gitsha = proc.stdout.read().strip()
            proc.wait()
            gelapsed_add = time() - st
            del(data)
            assert gitsha == bin_to_hex(binsha)     # we do it the same way, right ?

            #  as its the same sha, we reuse our path
            fsize_kib = osp.getsize(db_file) / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
            print(msg, file=sys.stderr)

            # compare ...
            print("Git-Python is %f %% faster than git when adding big %s files"
                  % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr)

            # read all
            st = time()
            hexsha, typename, size, data = rwrepo.git.get_object_data(gitsha)  # @UnusedVariable
            gelapsed_readall = time() - st
            print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"
                  % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall), file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %sfiles"
                  % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr)

            # read chunks
            st = time()
            hexsha, typename, size, stream = rwrepo.git.stream_object_data(gitsha)  # @UnusedVariable
            while True:
                data = stream.read(cs)
                if len(data) < cs:
                    break
            # END read stream
            gelapsed_readchunks = time() - st
            msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
            print(msg, file=sys.stderr)

            # compare
            print("Git-Python is %f %% faster than git when reading big %s files in chunks"
                  % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), file=sys.stderr)