Ejemplo n.º 1
0
    def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
                 b_mode, new_file, deleted_file, rename_from, rename_to, diff):

        self.a_mode = a_mode
        self.b_mode = b_mode

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=a_path)
        if b_blob_id is None:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        self.rename_from = rename_from or None
        self.rename_to = rename_to or None

        self.diff = diff
Ejemplo n.º 2
0
	def test_reading(self):
		gdb = GitDB(fixture_path('../../.git/objects'))
		
		# we have packs and loose objects, alternates doesn't necessarily exist
		assert 1 < len(gdb.databases()) < 4
		
		# access should be possible
		gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
		assert isinstance(gdb.info(gitdb_sha), OInfo)
		assert isinstance(gdb.stream(gitdb_sha), OStream)
		assert gdb.size() > 200
		sha_list = list(gdb.sha_iter())
		assert len(sha_list) == gdb.size()
		
		
		# This is actually a test for compound functionality, but it doesn't 
		# have a separate test module
		# test partial shas
		# this one as uneven and quite short
		assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc")
		
		# mix even/uneven hexshas
		for i, binsha in enumerate(sha_list):
			assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
		# END for each sha
		
		self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
Ejemplo n.º 3
0
    def test_reading(self):
        gdb = GitDB(fixture_path('../../../.git/objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
        assert isinstance(gdb.info(gitdb_sha), OInfo)
        assert isinstance(gdb.stream(gitdb_sha), OStream)
        assert gdb.size() > 200
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin(
            "155b62a9af0aa7677078331e111d0f7aa6eb4afc")

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list):
            assert gdb.partial_to_complete_sha_hex(
                bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex,
                              "0000")
Ejemplo n.º 4
0
	def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
				 b_mode, new_file, deleted_file, rename_from,
				 rename_to, diff):
	
		self.a_mode = a_mode
		self.b_mode = b_mode
		
		if self.a_mode:
			self.a_mode = mode_str_to_int(self.a_mode)
		if self.b_mode:
			self.b_mode = mode_str_to_int(self.b_mode)
		
		if a_blob_id is None:
			self.a_blob = None
		else:
			self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
		if b_blob_id is None:
			self.b_blob = None
		else:
			self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
		
		self.new_file = new_file
		self.deleted_file = deleted_file
		
		# be clear and use None instead of empty strings
		self.rename_from = rename_from or None
		self.rename_to = rename_to or None
		
		self.diff = diff
Ejemplo n.º 5
0
	def _deserialize(self, stream):
		""":param from_rev_list: if true, the stream format is coming from the rev-list command
		Otherwise it is assumed to be a plain data stream from our object"""
		readline = stream.readline
		self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')

		self.parents = list()
		next_line = None
		while True:
			parent_line = readline()
			if not parent_line.startswith('parent'):
				next_line = parent_line
				break
			# END abort reading parents
			self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
		# END for each parent line
		self.parents = tuple(self.parents)
		
		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
		
		
		# now we can have the encoding line, or an empty line followed by the optional
		# message.
		self.encoding = self.default_encoding
		# read encoding or empty line to separate message
		enc = readline()
		enc = enc.strip()
		if enc:
			self.encoding = enc[enc.find(' ')+1:]
			# now comes the message separator 
			readline()
		# END handle encoding
		
		# decode the authors name
		try:
			self.author.name = self.author.name.decode(self.encoding) 
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
		# END handle author's encoding
		
		# decode committer name
		try:
			self.committer.name = self.committer.name.decode(self.encoding) 
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
		# END handle author's encoding
		
		# a stream from our data simply gives us the plain message
		# The end of our message stream is marked with a newline that we strip
		self.message = stream.read()
		try:
			self.message = self.message.decode(self.encoding)
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
		# END exception handling 
		return self
Ejemplo n.º 6
0
def archive(test_viewer: TestViewer) -> Experiment:
    """
    将某次 test 对应 commit 的文件打包,相关命令为
        git archive -o <filename> <commit-id>
    :param test_viewer:
    :return:
    """
    repo = test_viewer.repo
    commit = Commit(repo, hex_to_bin(test_viewer.json_info['commit_hash']))

    old_path = os.getcwd()
    os.chdir(commit.tree.abspath)
    exp = Experiment('Archive')

    revert_path = exp.makedir('archive')
    revert_fn = os.path.join(revert_path, "file.zip")
    exp.regist_plugin('archive', {
        'file': revert_fn,
        'test_name': test_viewer.test_name
    })
    with open(revert_fn, 'wb') as w:
        commit.repo.archive(w, commit)

    exp.end()
    os.chdir(old_path)
    return exp
Ejemplo n.º 7
0
	def _set_cache_(self, attr):
		"""Cache all our attributes at once"""
		if attr in TagObject.__slots__:
			ostream = self.repo.odb.stream(self.binsha)
			lines = ostream.read().splitlines()
			
			obj, hexsha = lines[0].split(" ")		# object <hexsha>
			type_token, type_name = lines[1].split(" ") # type <type_name>
			self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
			
			self.tag = lines[2][4:]	 # tag <tag name>
			
			tagger_info = lines[3][7:]# tagger <actor> <date>
			self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
			
			# line 4 empty - it could mark the beginning of the next header
			# in case there really is no message, it would not exist. Otherwise 
			# a newline separates header from message
			if len(lines) > 5:
				self.message = "\n".join(lines[5:])
			else:
				self.message = ''
		# END check our attributes
		else:
			super(TagObject, self)._set_cache_(attr)
Ejemplo n.º 8
0
def name_to_object(repo, name):
	""":return: object specified by the given name, hexshas ( short and long )
		as well as references are supported"""
	hexsha = None
	
	# is it a hexsha ? Try the most common ones, which is 7 to 40
	if repo.re_hexsha_shortened.match(name):
		if len(name) != 40:
			# find long sha for short sha
			hexsha = short_to_long(repo.odb, name)
		else:
			hexsha = name
		# END handle short shas
	else:
		for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
			try:
				hexsha = SymbolicReference.dereference_recursive(repo, base % name)
				break
			except ValueError:
				pass
		# END for each base
	# END handle hexsha
	
	# tried everything ? fail
	if hexsha is None:
		raise BadObject(name)
	# END assert hexsha was found
	
	return Object.new_from_sha(repo, hex_to_bin(hexsha))
Ejemplo n.º 9
0
 def test_decompress_reader_special_case(self):
     odb = LooseObjectDB(fixture_path('objects'))
     ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
     
     # if there is a bug, we will be missing one byte exactly !
     data = ostream.read()
     assert len(data) == ostream.size
Ejemplo n.º 10
0
	def test_decompress_reader_special_case(self):
		odb = LooseObjectDB(fixture_path('objects'))
		ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
		
		# if there is a bug, we will be missing one byte exactly !
		data = ostream.read()
		assert len(data) == ostream.size
Ejemplo n.º 11
0
    def tags(self):
        versions = []
        repo = self._repo

        # Build a cache of tag -> commit
        # GitPython is not very optimized for reading large numbers of tags
        ref_cache = {}  # 'ref/tags/<tag>' -> hexsha
        # This code is the same that is executed for each tag in gitpython,
        # we excute it only once for all tags.
        for hexsha, ref in git.TagReference._iter_packed_refs(repo):
            gitobject = git.Object.new_from_sha(repo, hex_to_bin(hexsha))
            if gitobject.type == 'commit':
                ref_cache[ref] = str(gitobject)
            elif gitobject.type == 'tag' and gitobject.object.type == 'commit':
                ref_cache[ref] = str(gitobject.object)

        for tag in repo.tags:
            if tag.path in ref_cache:
                hexsha = ref_cache[tag.path]
            else:
                try:
                    hexsha = str(tag.commit)
                except ValueError:
                    # ValueError: Cannot resolve commit as tag TAGNAME points to a
                    # blob object - use the `.object` property instead to access it
                    # This is not a real tag for us, so we skip it
                    # https://github.com/rtfd/readthedocs.org/issues/4440
                    log.warning('Git tag skipped: %s', tag, exc_info=True)
                    continue

            versions.append(VCSVersion(self, hexsha, str(tag)))
        return versions
Ejemplo n.º 12
0
    def _set_cache_(self, attr):
        """Cache all our attributes at once"""
        if attr in TagObject.__slots__:
            ostream = self.repo.odb.stream(self.binsha)
            lines = ostream.read().decode(defenc).splitlines()

            obj, hexsha = lines[0].split(" ")  # object <hexsha>
            type_token, type_name = lines[1].split(" ")  # type <type_name>
            self.object = \
                get_object_type_by_name(type_name.encode('ascii'))(self.repo, hex_to_bin(hexsha))

            self.tag = lines[2][4:]  # tag <tag name>

            tagger_info = lines[3]  # tagger <actor> <date>
            self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(
                tagger_info)

            # line 4 empty - it could mark the beginning of the next header
            # in case there really is no message, it would not exist. Otherwise
            # a newline separates header from message
            if len(lines) > 5:
                self.message = "\n".join(lines[5:])
            else:
                self.message = ''
        # END check our attributes
        else:
            super(TagObject, self)._set_cache_(attr)
Ejemplo n.º 13
0
    def test_writing(self, path):
        NULL_BIN_SHA = '\0' * 20

        alt_path = os.path.join(path, 'alternates')
        rdb = ReferenceDB(alt_path)
        assert len(rdb.databases()) == 0
        assert rdb.size() == 0
        assert len(list(rdb.sha_iter())) == 0

        # try empty, non-existing
        assert not rdb.has_object(NULL_BIN_SHA)

        # setup alternate file
        # add two, one is invalid
        own_repo_path = fixture_path('../../../.git/objects')  # use own repo
        self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
        rdb.update_cache()
        assert len(rdb.databases()) == 1

        # we should now find a default revision of ours
        gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
        assert rdb.has_object(gitdb_sha)

        # remove valid
        self.make_alt_file(alt_path, ["just/one/invalid/path"])
        rdb.update_cache()
        assert len(rdb.databases()) == 0

        # add valid
        self.make_alt_file(alt_path, [own_repo_path])
        rdb.update_cache()
        assert len(rdb.databases()) == 1
Ejemplo n.º 14
0
def name_to_object(repo, name):
    """:return: object specified by the given name, hexshas ( short and long )
		as well as references are supported"""
    hexsha = None

    # is it a hexsha ? Try the most common ones, which is 7 to 40
    if repo.re_hexsha_shortened.match(name):
        if len(name) != 40:
            # find long sha for short sha
            hexsha = short_to_long(repo.odb, name)
        else:
            hexsha = name
        # END handle short shas
    else:
        for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s',
                     'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
            try:
                hexsha = SymbolicReference.dereference_recursive(
                    repo, base % name)
                break
            except ValueError:
                pass
        # END for each base
    # END handle hexsha

    # tried everything ? fail
    if hexsha is None:
        raise BadObject(name)
    # END assert hexsha was found

    return Object.new_from_sha(repo, hex_to_bin(hexsha))
Ejemplo n.º 15
0
    def test_index_merge_tree(self, rw_repo):
        # A bit out of place, but we need a different repo for this:
        self.assertNotEqual(self.rorepo, rw_repo)
        self.assertEqual(len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))), 2)

        # SINGLE TREE MERGE
        # current index is at the (virtual) cur_commit
        next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c"
        parent_commit = rw_repo.head.commit.parents[0]
        manifest_key = IndexFile.entry_key('MANIFEST.in', 0)
        manifest_entry = rw_repo.index.entries[manifest_key]
        rw_repo.index.merge_tree(next_commit)
        # only one change should be recorded
        assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha

        rw_repo.index.reset(rw_repo.head)
        self.assertEqual(rw_repo.index.entries[manifest_key].binsha, manifest_entry.binsha)

        # FAKE MERGE
        #############
        # Add a change with a NULL sha that should conflict with next_commit. We
        # pretend there was a change, but we do not even bother adding a proper
        # sha for it ( which makes things faster of course )
        manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3]))
        # try write flag
        self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False))
        # add actually resolves the null-hex-sha for us as a feature, but we can
        # edit the index manually
        assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
        # must operate on the same index for this ! Its a bit problematic as
        # it might confuse people
        index = rw_repo.index
        index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
        index.write()
        self.assertEqual(rw_repo.index.entries[manifest_key].hexsha, Diff.NULL_HEX_SHA)

        # write an unchanged index ( just for the fun of it )
        rw_repo.index.write()

        # a three way merge would result in a conflict and fails as the command will
        # not overwrite any entries in our index and hence leave them unmerged. This is
        # mainly a protection feature as the current index is not yet in a tree
        self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit)

        # the only way to get the merged entries is to safe the current index away into a tree,
        # which is like a temporary commit for us. This fails as well as the NULL sha deos not
        # have a corresponding object
        # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
        # self.failUnlessRaises(GitCommandError, index.write_tree)

        # if missing objects are okay, this would work though ( they are always okay now )
        # As we can't read back the tree with NULL_SHA, we rather set it to something else
        index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:])
        tree = index.write_tree()

        # now make a proper three way merge with unmerged entries
        unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
        unmerged_blobs = unmerged_tree.unmerged_blobs()
        self.assertEqual(len(unmerged_blobs), 1)
        self.assertEqual(list(unmerged_blobs.keys())[0], manifest_key[0])
Ejemplo n.º 16
0
    def _iter_from_process_or_stream(cls, repo, proc_or_stream):
        """Parse out commit information into a list of Commit objects
        We expect one-line per commit, and parse the actual commit information directly
        from our lighting fast object database

        :param proc: git-rev-list process instance - one sha per line
        :return: iterator returning Commit objects"""
        stream = proc_or_stream
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout

        readline = stream.readline
        while True:
            line = readline()
            if not line:
                break
            hexsha = line.strip()
            if len(hexsha) > 40:
                # split additional information, as returned by bisect for instance
                hexsha, rest = line.split(None, 1)
            # END handle extra info

            assert len(hexsha) == 40, "Invalid line: %s" % hexsha
            yield Commit(repo, hex_to_bin(hexsha))
        # END for each line in stream
        # TODO: Review this - it seems process handling got a bit out of control
        # due to many developers trying to fix the open file handles issue
        if hasattr(proc_or_stream, 'wait'):
            finalize_process(proc_or_stream)
Ejemplo n.º 17
0
    def test_writing(self, path):
        alt_path = os.path.join(path, 'alternates')
        rdb = ReferenceDB(alt_path)
        assert len(rdb.databases()) == 0
        assert rdb.size() == 0
        assert len(list(rdb.sha_iter())) == 0

        # try empty, non-existing
        assert not rdb.has_object(NULL_BIN_SHA)

        # setup alternate file
        # add two, one is invalid
        own_repo_path = fixture_path('../../../.git/objects')       # use own repo
        self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
        rdb.update_cache()
        assert len(rdb.databases()) == 1

        # we should now find a default revision of ours
        gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
        assert rdb.has_object(gitdb_sha)

        # remove valid
        self.make_alt_file(alt_path, ["just/one/invalid/path"])
        rdb.update_cache()
        assert len(rdb.databases()) == 0

        # add valid
        self.make_alt_file(alt_path, [own_repo_path])
        rdb.update_cache()
        assert len(rdb.databases()) == 1
Ejemplo n.º 18
0
 def test_list(self):
     assert isinstance(
         Commit.list_items(self.rorepo, "0.1.5", max_count=5)[
             hex_to_bin("5117c9c8a4d3af19a9958677e45cda9269de1541")
         ],
         Commit,
     )
Ejemplo n.º 19
0
    def test_index_merge_tree(self, rw_repo):
        # A bit out of place, but we need a different repo for this:
        assert self.rorepo != rw_repo and not (self.rorepo == rw_repo)
        assert len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))) == 2

        # SINGLE TREE MERGE
        # current index is at the (virtual) cur_commit
        next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c"
        parent_commit = rw_repo.head.commit.parents[0]
        manifest_key = IndexFile.entry_key('MANIFEST.in', 0)
        manifest_entry = rw_repo.index.entries[manifest_key]
        rw_repo.index.merge_tree(next_commit)
        # only one change should be recorded
        assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha

        rw_repo.index.reset(rw_repo.head)
        assert rw_repo.index.entries[manifest_key].binsha == manifest_entry.binsha

        # FAKE MERGE
        #############
        # Add a change with a NULL sha that should conflict with next_commit. We
        # pretend there was a change, but we do not even bother adding a proper
        # sha for it ( which makes things faster of course )
        manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3]))
        # try write flag
        self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False))
        # add actually resolves the null-hex-sha for us as a feature, but we can
        # edit the index manually
        assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
        # must operate on the same index for this ! Its a bit problematic as
        # it might confuse people
        index = rw_repo.index
        index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
        index.write()
        assert rw_repo.index.entries[manifest_key].hexsha == Diff.NULL_HEX_SHA

        # write an unchanged index ( just for the fun of it )
        rw_repo.index.write()

        # a three way merge would result in a conflict and fails as the command will
        # not overwrite any entries in our index and hence leave them unmerged. This is
        # mainly a protection feature as the current index is not yet in a tree
        self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit)

        # the only way to get the merged entries is to safe the current index away into a tree,
        # which is like a temporary commit for us. This fails as well as the NULL sha deos not
        # have a corresponding object
        # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
        # self.failUnlessRaises(GitCommandError, index.write_tree)

        # if missing objects are okay, this would work though ( they are always okay now )
        # As we can't read back the tree with NULL_SHA, we rather set it to something else
        index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:])
        tree = index.write_tree()

        # now make a proper three way merge with unmerged entries
        unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
        unmerged_blobs = unmerged_tree.unmerged_blobs()
        assert len(unmerged_blobs) == 1 and list(unmerged_blobs.keys())[0] == manifest_key[0]
Ejemplo n.º 20
0
	def _get_object(self):
		"""
		:return:
			The object our ref currently refers to. Refs can be cached, they will 
			always point to the actual object as it gets re-created on each query"""
		# have to be dynamic here as we may be a tag which can point to anything
		# Our path will be resolved to the hexsha which will be used accordingly
		return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
Ejemplo n.º 21
0
 def _get_object(self):
     """
     :return:
         The object our ref currently refers to. Refs can be cached, they will
         always point to the actual object as it gets re-created on each query"""
     # have to be dynamic here as we may be a tag which can point to anything
     # Our path will be resolved to the hexsha which will be used accordingly
     return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
Ejemplo n.º 22
0
    def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id,
                 a_mode, b_mode, new_file, deleted_file, raw_rename_from,
                 raw_rename_to, diff):

        self.a_mode = a_mode
        self.b_mode = b_mode

        assert a_rawpath is None or isinstance(a_rawpath, binary_type)
        assert b_rawpath is None or isinstance(b_rawpath, binary_type)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=self.a_path)

        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=self.b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from,
                                                     binary_type)
        assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
Ejemplo n.º 23
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of (commit, range) tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of (git.Commit, range) tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
        commits = dict()

        stream = iter(data.splitlines())
        while True:
            line = next(stream)  # when exhausted, casues a StopIteration, terminating this function

            hexsha, _, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        break

                c = Commit(self, hex_to_bin(hexsha),
                           author=Actor(safe_decode(props[b'author']),
                                        safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                           authored_date=int(props[b'author-time']),
                           committer=Actor(safe_decode(props[b'committer']),
                                           safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
                           committed_date=int(props[b'committer-time']),
                           message=safe_decode(props[b'summary']))
                commits[hexsha] = c
            else:
                # Discard the next line (it's a filename end tag)
                line = next(stream)
                assert line.startswith(b'filename'), 'Unexpected git blame output'

            yield commits[hexsha], range(lineno, lineno + num_lines)
Ejemplo n.º 24
0
    def sha_iter(self):
        # find all files which look like an object, extract sha from there
        for root, dirs, files in os.walk(self.root_path()):
            root_base = basename(root)
            if len(root_base) != 2:
                continue

            for f in files:
                if len(f) != 38:
                    continue
                yield hex_to_bin(root_base + f)
Ejemplo n.º 25
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: Full binary 20 byte sha from the given partial hexsha
     :raise AmbiguousObjectName:
     :raise BadObject:
     :note: currently we only raise BadObject as git does not communicate
         AmbiguousObjects separately"""
     try:
         hexsha, typename, size = self._git.get_object_header(partial_hexsha)
         return hex_to_bin(hexsha)
     except (GitCommandError, ValueError):
         raise BadObject(partial_hexsha)
Ejemplo n.º 26
0
def count_rev_list(repo):
    print('Counting with rev-list - this will NOT count dangling objects.')
    typecount = defaultdict(int)
    for line in repo.git.rev_list('--objects', '--all').split('\n'):
        binsha = hex_to_bin(line.split()[0])
        oinfo = repo.odb.info(binsha)
        typecount[oinfo.type] += 1
    print(
        ', '.join('{:s}s: {:d}'.format(k.decode('utf8').capitalize(), v)
                  for k, v in sorted(typecount.items())), 'Total:',
        sum(typecount.values()))
Ejemplo n.º 27
0
Archivo: db.py Proyecto: daleha/git-kit
	def partial_to_complete_sha_hex(self, partial_hexsha):
		""":return: Full binary 20 byte sha from the given partial hexsha
		:raise AmbiguousObjectName:
		:raise BadObject:
		:note: currently we only raise BadObject as pygit does not communicate 
			AmbiguousObjects separately"""
		try:
			hexsha, typename, size = self._git.get_object_header(partial_hexsha)
			return hex_to_bin(hexsha)
		except (GitPyCommandError, ValueError):
			raise BadObject(partial_hexsha)
Ejemplo n.º 28
0
def fu(repo, text):
    for header in Diff.re_header.finditer(text):
        a_path, b_path, similarity_index, rename_from, rename_to, \
        old_mode, new_mode, new_file_mode, deleted_file_mode, \
        a_blob_id, b_blob_id, b_mode = header.groups()
        # new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)

        # Our only means to find the actual text is to see what has not been matched by our regex,
        # and then retro-actively assin it to our index
        # if previous_header is not None:
        #     index[-1].diff = text[previous_header.end():header.start()]
        # end assign actual diff

        # Make sure the mode is set if the path is set. Otherwise the resulting blob is invalid
        # We just use the one mode we should have parsed
        a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
        b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
        ablob = Blob(repo, hex_to_bin(a_blob_id), mode=a_mode, path=a_path)
        bblob = Blob(repo, hex_to_bin(b_blob_id), mode=b_mode, path=a_path)
        return ablob, bblob
Ejemplo n.º 29
0
    def sha_iter(self):
        # find all files which look like an object, extract sha from there
        for root, dirs, files in os.walk(self.root_path()):
            root_base = basename(root)
            if len(root_base) != 2:
                continue

            for f in files:
                if len(f) != 38:
                    continue
                yield hex_to_bin(root_base + f)
Ejemplo n.º 30
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
		Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]),
                         Tree.tree_id << 12, '')

        self.parents = list()
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith('parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(
                type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
        # END for each parent line
        self.parents = tuple(self.parents)

        self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(
            next_line)
        self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(
            readline())

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding
        # read encoding or empty line to separate message
        enc = readline()
        enc = enc.strip()
        if enc:
            self.encoding = enc[enc.find(' ') + 1:]
            # now comes the message separator
            readline()
        # END handle encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        return self
Ejemplo n.º 31
0
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        assert len(types) == len(self.type_tuples)

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            assert item.hexsha == hexsha
            assert item.type == typename
            assert item.size
            assert item == item
            assert not item != item
            assert str(item) == item.hexsha
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):  # never runs here
                    assert not item.path.startswith("/")  # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfilename = tempfile.mktemp(suffix='test-stream')
            tmpfile = open(tmpfilename, 'wb+')
            assert item == item.stream_data(tmpfile)
            tmpfile.seek(0)
            assert tmpfile.read() == data
            tmpfile.close()
            os.remove(tmpfilename)
            # END stream to file directly
        # END for each object type to create

        # each has a unique sha
        assert len(s) == num_objs
        assert len(s | s) == num_objs
        assert num_index_objs == 2
Ejemplo n.º 32
0
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        assert len(types) == len(self.type_tuples)

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            assert item.hexsha == hexsha
            assert item.type == typename
            assert item.size
            assert item == item
            assert not item != item
            assert str(item) == item.hexsha
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):                        # never runs here
                    assert not item.path.startswith("/")        # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfilename = tempfile.mktemp(suffix='test-stream')
            tmpfile = open(tmpfilename, 'wb+')
            assert item == item.stream_data(tmpfile)
            tmpfile.seek(0)
            assert tmpfile.read() == data
            tmpfile.close()
            os.remove(tmpfilename)
            # END stream to file directly
        # END for each object type to create

        # each has a unique sha
        assert len(s) == num_objs
        assert len(s | s) == num_objs
        assert num_index_objs == 2
Ejemplo n.º 33
0
	def _get_commit(self):
		"""
		:return:
			Commit object we point to, works for detached and non-detached 
			SymbolicReferences"""
		# we partially reimplement it to prevent unnecessary file access
		hexsha, target_ref_path = self._get_ref_info()
		
		# it is a detached reference
		if hexsha:
			return Commit(self.repo, hex_to_bin(hexsha))
		
		return self.from_path(self.repo, target_ref_path).commit
Ejemplo n.º 34
0
    def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
                 b_mode, new_file, deleted_file, raw_rename_from,
                 raw_rename_to, diff, change_type):

        self.a_mode = a_mode
        self.b_mode = b_mode

        assert a_rawpath is None or isinstance(a_rawpath, binary_type)
        assert b_rawpath is None or isinstance(b_rawpath, binary_type)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)

        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
        assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
        self.change_type = change_type
Ejemplo n.º 35
0
    def partial_to_complete_sha_hex(self, partial_hexsha):
        """
		:return: 20 byte binary sha1 from the given less-than-40 byte hexsha
		:param partial_hexsha: hexsha with less than 40 byte
		:raise AmbiguousObjectName: """
        databases = list()
        _databases_recursive(self, databases)

        len_partial_hexsha = len(partial_hexsha)
        if len_partial_hexsha % 2 != 0:
            partial_binsha = hex_to_bin(partial_hexsha + "0")
        else:
            partial_binsha = hex_to_bin(partial_hexsha)
        # END assure successful binary conversion

        candidate = None
        for db in databases:
            full_bin_sha = None
            try:
                if hasattr(db, 'partial_to_complete_sha_hex'):
                    full_bin_sha = db.partial_to_complete_sha_hex(
                        partial_hexsha)
                else:
                    full_bin_sha = db.partial_to_complete_sha(
                        partial_binsha, len_partial_hexsha)
                # END handle database type
            except BadObject:
                continue
            # END ignore bad objects
            if full_bin_sha:
                if candidate and candidate != full_bin_sha:
                    raise AmbiguousObjectName(partial_hexsha)
                candidate = full_bin_sha
            # END handle candidate
        # END for each db
        if not candidate:
            raise BadObject(partial_binsha)
        return candidate
Ejemplo n.º 36
0
    def test_decompress_reader_special_case(self):
        odb = LooseObjectDB(fixture_path('objects'))
        mdb = MemoryDB()
        for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
                    b'7bb839852ed5e3a069966281bb08d50012fb309b',):
            ostream = odb.stream(hex_to_bin(sha))

            # if there is a bug, we will be missing one byte exactly !
            data = ostream.read()
            assert len(data) == ostream.size

            # Putting it back in should yield nothing new - after all, we have
            dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
            assert dump.hexsha == sha
Ejemplo n.º 37
0
    def test_decompress_reader_special_case(self):
        odb = LooseObjectDB(fixture_path('objects'))
        mdb = MemoryDB()
        for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
                    b'7bb839852ed5e3a069966281bb08d50012fb309b',):
            ostream = odb.stream(hex_to_bin(sha))

            # if there is a bug, we will be missing one byte exactly !
            data = ostream.read()
            assert len(data) == ostream.size

            # Putting it back in should yield nothing new - after all, we have
            dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
            assert dump.hexsha == sha
Ejemplo n.º 38
0
	def partial_to_complete_sha_hex(self, partial_hexsha):
		"""
		:return: 20 byte binary sha1 from the given less-than-40 byte hexsha
		:param partial_hexsha: hexsha with less than 40 byte
		:raise AmbiguousObjectName: """
		databases = list()
		_databases_recursive(self, databases)
		
		len_partial_hexsha = len(partial_hexsha)
		if len_partial_hexsha % 2 != 0:
			partial_binsha = hex_to_bin(partial_hexsha + "0")
		else:
			partial_binsha = hex_to_bin(partial_hexsha)
		# END assure successful binary conversion 
		
		candidate = None
		for db in databases:
			full_bin_sha = None
			try:
				if hasattr(db, 'partial_to_complete_sha_hex'):
					full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
				else:
					full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
				# END handle database type
			except BadObject:
				continue
			# END ignore bad objects
			if full_bin_sha:
				if candidate and candidate != full_bin_sha:
					raise AmbiguousObjectName(partial_hexsha)
				candidate = full_bin_sha
			# END handle candidate
		# END for each db
		if not candidate:
			raise BadObject(partial_binsha)
		return candidate
Ejemplo n.º 39
0
def name_to_object(repo, name, return_ref=False):
    """
	:return: object specified by the given name, hexshas ( short and long )
		as well as references are supported
	:param return_ref: if name specifies a reference, we will return the reference
		instead of the object. Otherwise it will raise BadObject
	"""
    hexsha = None

    # is it a hexsha ? Try the most common ones, which is 7 to 40
    if repo.re_hexsha_shortened.match(name):
        if len(name) != 40:
            # find long sha for short sha
            hexsha = short_to_long(repo.odb, name)
        else:
            hexsha = name
        # END handle short shas
    #END find sha if it matches

    # if we couldn't find an object for what seemed to be a short hexsha
    # try to find it as reference anyway, it could be named 'aaa' for instance
    if hexsha is None:
        for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s',
                     'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
            try:
                hexsha = SymbolicReference.dereference_recursive(
                    repo, base % name)
                if return_ref:
                    return SymbolicReference(repo, base % name)
                #END handle symbolic ref
                break
            except ValueError:
                pass
        # END for each base
    # END handle hexsha

    # didn't find any ref, this is an error
    if return_ref:
        raise BadObject("Couldn't find reference named %r" % name)
    #END handle return ref

    # tried everything ? fail
    if hexsha is None:
        raise BadObject(name)
    # END assert hexsha was found

    return Object.new_from_sha(repo, hex_to_bin(hexsha))
Ejemplo n.º 40
0
def name_to_object(repo, name, return_ref=False):
    """
    :return: object specified by the given name, hexshas ( short and long )
        as well as references are supported
    :param return_ref: if name specifies a reference, we will return the reference
        instead of the object. Otherwise it will raise BadObject
    """
    hexsha = None

    # is it a hexsha ? Try the most common ones, which is 7 to 40
    if repo.re_hexsha_shortened.match(name):
        if len(name) != 40:
            # find long sha for short sha
            hexsha = short_to_long(repo.odb, name)
        else:
            hexsha = name
        # END handle short shas
        # END find sha if it matches

    # if we couldn't find an object for what seemed to be a short hexsha
    # try to find it as reference anyway, it could be named 'aaa' for instance
    if hexsha is None:
        for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s',
                     'refs/remotes/%s/HEAD'):
            try:
                hexsha = SymbolicReference.dereference_recursive(repo, base % name)
                if return_ref:
                    return SymbolicReference(repo, base % name)
                #END handle symbolic ref
                break
            except ValueError:
                pass
            # END for each base
    # END handle hexsha

    # didn't find any ref, this is an error
    if return_ref:
        raise BadObject("Couldn't find reference named %r" % name)
    #END handle return ref

    # tried everything ? fail
    if hexsha is None:
        raise BadObject(name)
    # END assert hexsha was found

    return Object.new_from_sha(repo, hex_to_bin(hexsha))
Ejemplo n.º 41
0
def reset(test_viewer: TestViewer) -> Experiment:
    """
    将工作目录中的文件恢复到某个commit
        恢复快照的 git 流程:
            git branch experiment
            git add . & git commit -m ... // 保证文件最新,防止冲突报错,这一步由 Experiment() 代为完成
            git checkout <commit-id> // 恢复文件到 <commit-id>
            git checkout -b reset // 将当前状态附到新的临时分支 reset 上
            git branch experiment // 切换回 experiment 分支
            git add . & git commit -m ... // 将当前状态重新提交到最新
                // 此时experiment 中最新的commit 为恢复的<commit-id>
            git branch -D reset  // 删除临时分支
            git branch master // 最终回到原来分支,保证除文件变动外git状态完好
    :param test_viewer: TestViewer
    :return:
    """
    repo = test_viewer.repo
    commit = Commit(repo, hex_to_bin(test_viewer.json_info['commit_hash']))

    old_path = os.getcwd()
    os.chdir(commit.tree.abspath)
    exp = Experiment('Reset')

    repo = commit.repo  # type:Repo
    with branch(commit.repo, _GITKEY.thexp_branch) as new_branch:
        repo.git.checkout(commit.hexsha)
        repo.git.checkout('-b', 'reset')
        repo.head.reference = new_branch
        repo.git.add('.')
        ncommit = repo.index.commit("Reset from {}".format(commit.hexsha))
        repo.git.branch('-d', 'reset')
    exp.regist_plugin(
        'reset',
        {
            'test_name': test_viewer.test_name,  # 从哪个状态恢复
            'from': exp.commit.hexsha,  # reset 运行时的快照
            'where': commit.hexsha,  # 恢复到哪一次 commit,是恢复前的保存的状态
            'to': ncommit.hexsha,  # 对恢复后的状态再次进行提交,此时 from 和 to 两次提交状态应该完全相同
        })

    exp.end()
    os.chdir(old_path)
    return exp
Ejemplo n.º 42
0
def getCategoryObjects(workingDir):
    repo = Repo(workingDir)
    co = CategoryObjects()

    allShaWithName = repo.git.execute(
        'git rev-list --objects --all --indexed-objects', shell=True)
    allSha = [
        item.split(' ')[0] for item in allShaWithName.split('\n') if item
    ]
    for shaStr in allSha:
        shaBin = hex_to_bin(shaStr)
        info = repo.odb.info(shaBin)
        type = info.type.decode('utf-8')
        if type == 'tree':
            co.trees.append(createTree(repo, shaBin, shaStr))
        elif type == 'commit':
            co.commits.append(createCommit(repo, shaBin, shaStr))
        elif type == 'blob':
            co.blobs.append(createBlob(repo, shaBin, shaStr))
        else:
            pass
    return co
Ejemplo n.º 43
0
    def __setstate__(self, state: dict):
        from gitdb.util import hex_to_bin
        from git import Commit
        self._start_time = state['_start_time']
        self._time_fmt = state['_time_fmt']

        self._exp_name = state['_exp_name']
        self._exp_dir = state['_exp_dir']
        self._test_dir = state['_test_dir']
        self._project_dir = state['_project_dir']

        self._hold_dirs = state['_hold_dirs']
        self._plugins = state['_plugins']
        self._tags = state['_tags']

        self._exc_dict = state.get('_exc_dict', None)
        self._end_state = state['_end_state']

        self._in_main = state['_in_main']

        self._repo = None
        self._commit = Commit(self.repo, hex_to_bin(state['_commit']))
        self._config = globs
Ejemplo n.º 44
0
    def _iter_from_process_or_stream(cls, repo, proc_or_stream):
        """Parse out commit information into a list of Commit objects
		We expect one-line per commit, and parse the actual commit information directly
		from our lighting fast object database

		:param proc: git-rev-list process instance - one sha per line
		:return: iterator returning Commit objects"""
        stream = proc_or_stream
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout

        readline = stream.readline
        while True:
            line = readline()
            if not line:
                break
            hexsha = line.strip()
            if len(hexsha) > 40:
                # split additional information, as returned by bisect for instance
                hexsha, rest = line.split(None, 1)
            # END handle extra info

            assert len(hexsha) == 40, "Invalid line: %s" % hexsha
            yield Commit(repo, hex_to_bin(hexsha))
Ejemplo n.º 45
0
	def _iter_from_process_or_stream(cls, repo, proc_or_stream):
		"""Parse out commit information into a list of Commit objects
		We expect one-line per commit, and parse the actual commit information directly
		from our lighting fast object database

		:param proc: pygit-rev-list process instance - one sha per line
		:return: iterator returning Commit objects"""
		stream = proc_or_stream
		if not hasattr(stream,'readline'):
			stream = proc_or_stream.stdout
			
		readline = stream.readline
		while True:
			line = readline()
			if not line:
				break
			hexsha = line.strip()
			if len(hexsha) > 40:
				# split additional information, as returned by bisect for instance
				hexsha, rest = line.split(None, 1)
			# END handle extra info
			
			assert len(hexsha) == 40, "Invalid line: %s" % hexsha
			yield Commit(repo, hex_to_bin(hexsha))
Ejemplo n.º 46
0
def rev_parse(repo, rev):
    """
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification, please see
        http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
        for details
    :note: Currently there is no access to the rev-log, rev-specs may only contain
        topological tokens such ~ and ^.
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
            continue
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
            else:
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                else:
                    obj = name_to_object(repo, rev[:start])
                    # END handle token
                    # END handle refname

            if ref is not None:
                obj = ref.commit
            #END handle ref
        # END initialize obj on first token


        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" % rev)
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                try:
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass  # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                else:
                    # cannot do anything for non-tags
                    pass
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                try:
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError("Support for additional @{...} modes not implemented")
                    # END handle revlog index

                try:
                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                #END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
            else:
                raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError("Could not accomodate requested object type %r, got %s" % (
                output_type, obj.type))
            # END verify ouput type

            start = end + 1  # skip brace
            parsed_to = start
            continue
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                else:
                    break
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode


        parsed_to = start
        # handle hiererarchy walk
        try:
            if token == "~":
                obj = to_commit(obj)
                for item in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
            else:
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadObject("Invalid Revision in %s" % rev)
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError(
            "Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))

    return obj
Ejemplo n.º 47
0
    def blame(self, rev, file):
        """The blame information for the given file at the given revision.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return:
            list: [git.Commit, list: [<line>]]
            A list of tuples associating a Commit object with a list of lines that
            changed within the given commit. The Commit objects will be given in order
            of appearance."""
        data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
        commits = dict()
        blames = list()
        info = None

        keepends = True
        for line in data.splitlines(keepends):
            try:
                line = line.rstrip().decode(defenc)
            except UnicodeDecodeError:
                firstpart = ''
                is_binary = True
            else:
                # As we don't have an idea when the binary data ends, as it could contain multiple newlines
                # in the process. So we rely on being able to decode to tell us what is is.
                # This can absolutely fail even on text files, but even if it does, we should be fine treating it
                # as binary instead
                parts = self.re_whitespace.split(line, 1)
                firstpart = parts[0]
                is_binary = False
            # end handle decode of line

            if self.re_hexsha_only.search(firstpart):
                # handles
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7        - indicates blame-data start
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2          - indicates
                # another line of blame with the same data
                digits = parts[-1].split(" ")
                if len(digits) == 3:
                    info = {'id': firstpart}
                    blames.append([None, []])
                elif info['id'] != firstpart:
                    info = {'id': firstpart}
                    blames.append([commits.get(firstpart), []])
                # END blame data initialization
            else:
                m = self.re_author_committer_start.search(firstpart)
                if m:
                    # handles:
                    # author Tom Preston-Werner
                    # author-mail <*****@*****.**>
                    # author-time 1192271832
                    # author-tz -0700
                    # committer Tom Preston-Werner
                    # committer-mail <*****@*****.**>
                    # committer-time 1192271832
                    # committer-tz -0700  - IGNORED BY US
                    role = m.group(0)
                    if firstpart.endswith('-mail'):
                        info["%s_email" % role] = parts[-1]
                    elif firstpart.endswith('-time'):
                        info["%s_date" % role] = int(parts[-1])
                    elif role == firstpart:
                        info[role] = parts[-1]
                    # END distinguish mail,time,name
                else:
                    # handle
                    # filename lib/grit.rb
                    # summary add Blob
                    # <and rest>
                    if firstpart.startswith('filename'):
                        info['filename'] = parts[-1]
                    elif firstpart.startswith('summary'):
                        info['summary'] = parts[-1]
                    elif firstpart == '':
                        if info:
                            sha = info['id']
                            c = commits.get(sha)
                            if c is None:
                                c = Commit(
                                    self,
                                    hex_to_bin(sha),
                                    author=Actor._from_string(
                                        info['author'] + ' ' +
                                        info['author_email']),
                                    authored_date=info['author_date'],
                                    committer=Actor._from_string(
                                        info['committer'] + ' ' +
                                        info['committer_email']),
                                    committed_date=info['committer_date'],
                                    message=info['summary'])
                                commits[sha] = c
                            # END if commit objects needs initial creation
                            if not is_binary:
                                if line and line[0] == '\t':
                                    line = line[1:]
                            else:
                                # NOTE: We are actually parsing lines out of binary data, which can lead to the
                                # binary being split up along the newline separator. We will append this to the blame
                                # we are currently looking at, even though it should be concatenated with the last line
                                # we have seen.
                                pass
                            # end handle line contents
                            blames[-1][0] = c
                            blames[-1][1].append(line)
                            info = {'id': sha}
                        # END if we collected commit info
                    # END distinguish filename,summary,rest
                # END distinguish author|committer vs filename,summary,rest
            # END distinguish hexsha vs other information
        return blames
Ejemplo n.º 48
0
Archivo: db.py Proyecto: daleha/git-kit
	def stream(self, sha):
		"""For now, all lookup is done by pygit itself"""
		hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
		return OStream(hex_to_bin(hexsha), typename, size, stream)
Ejemplo n.º 49
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
        Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')

        self.parents = list()
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith(b'parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii'))))
        # END for each parent line
        self.parents = tuple(self.parents)

        # we don't know actual author encoding before we have parsed it, so keep the lines around
        author_line = next_line
        committer_line = readline()

        # we might run into one or more mergetag blocks, skip those for now
        next_line = readline()
        while next_line.startswith(b'mergetag '):
            next_line = readline()
            while next_line.startswith(b' '):
                next_line = readline()
        # end skip mergetags

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding

        # read headers
        enc = next_line
        buf = enc.strip()
        while buf:
            if buf[0:10] == b"encoding ":
                self.encoding = buf[buf.find(' ') + 1:].decode('ascii')
            elif buf[0:7] == b"gpgsig ":
                sig = buf[buf.find(b' ') + 1:] + b"\n"
                is_next_header = False
                while True:
                    sigbuf = readline()
                    if not sigbuf:
                        break
                    if sigbuf[0:1] != b" ":
                        buf = sigbuf.strip()
                        is_next_header = True
                        break
                    sig += sigbuf[1:]
                # end read all signature
                self.gpgsig = sig.rstrip(b"\n").decode('ascii')
                if is_next_header:
                    continue
            buf = readline().strip()
        # decode the authors name

        try:
            self.author, self.authored_date, self.author_tz_offset = \
                parse_actor_and_date(author_line.decode(self.encoding))
        except UnicodeDecodeError:
            log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding,
                      exc_info=True)

        try:
            self.committer, self.committed_date, self.committer_tz_offset = \
                parse_actor_and_date(committer_line.decode(self.encoding))
        except UnicodeDecodeError:
            log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding,
                      exc_info=True)
        # END handle author's encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        try:
            self.message = self.message.decode(self.encoding)
        except UnicodeDecodeError:
            log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True)
        # END exception handling

        return self
Ejemplo n.º 50
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
        Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')

        self.parents = list()
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith('parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
        # END for each parent line
        self.parents = tuple(self.parents)

        self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
        self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())

        # we might run into one or more mergetag blocks, skip those for now
        next_line = readline()
        while next_line.startswith('mergetag '):
            next_line = readline()
            while next_line.startswith(' '):
                next_line = readline()

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding

        # read headers
        enc = next_line
        buf = enc.strip()
        while buf != "":
            if buf[0:10] == "encoding ":
                self.encoding = buf[buf.find(' ') + 1:]
            elif buf[0:7] == "gpgsig ":
                sig = buf[buf.find(' ') + 1:] + "\n"
                is_next_header = False
                while True:
                    sigbuf = readline()
                    if sigbuf == "": break
                    if sigbuf[0:1] != " ":
                        buf = sigbuf.strip()
                        is_next_header = True
                        break
                    sig += sigbuf[1:]
                self.gpgsig = sig.rstrip("\n")
                if is_next_header:
                    continue
            buf = readline().strip()

        # decode the authors name
        try:
            self.author.name = self.author.name.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
        # END handle author's encoding

        # decode committer name
        try:
            self.committer.name = self.committer.name.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
        # END handle author's encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        try:
            self.message = self.message.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
        # END exception handling
        return self
Ejemplo n.º 51
0
    def test_base(self):
        rlp_head = fixture_path('reflog_HEAD')
        rlp_master = fixture_path('reflog_master')
        tdir = tempfile.mktemp(suffix="test_reflogs")
        os.mkdir(tdir)

        rlp_master_ro = RefLog.path(self.rorepo.head)
        assert os.path.isfile(rlp_master_ro)

        # simple read
        reflog = RefLog.from_file(rlp_master_ro)
        assert reflog._path is not None
        assert isinstance(reflog, RefLog)
        assert len(reflog)

        # iter_entries works with path and with stream
        assert len(list(RefLog.iter_entries(open(rlp_master, 'rb'))))
        assert len(list(RefLog.iter_entries(rlp_master)))

        # raise on invalid revlog
        # TODO: Try multiple corrupted ones !
        pp = 'reflog_invalid_'
        for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'):
            self.failUnlessRaises(ValueError, RefLog.from_file, fixture_path(pp + suffix))
        # END for each invalid file

        # cannot write an uninitialized reflog
        self.failUnlessRaises(ValueError, RefLog().write)

        # test serialize and deserialize - results must match exactly
        binsha = hex_to_bin(('f' * 40).encode('ascii'))
        msg = "my reflog message"
        cr = self.rorepo.config_reader()
        for rlp in (rlp_head, rlp_master):
            reflog = RefLog.from_file(rlp)
            tfile = os.path.join(tdir, os.path.basename(rlp))
            reflog.to_file(tfile)
            assert reflog.write() is reflog

            # parsed result must match ...
            treflog = RefLog.from_file(tfile)
            assert treflog == reflog

            # ... as well as each bytes of the written stream
            assert open(tfile).read() == open(rlp).read()

            # append an entry
            entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg)
            assert entry.oldhexsha == IndexObject.NULL_HEX_SHA
            assert entry.newhexsha == 'f' * 40
            assert entry.message == msg
            assert RefLog.from_file(tfile)[-1] == entry

            # index entry
            # raises on invalid index
            self.failUnlessRaises(IndexError, RefLog.entry_at, rlp, 10000)

            # indices can be positive ...
            assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry)
            RefLog.entry_at(rlp, 23)

            # ... and negative
            for idx in (-1, -24):
                RefLog.entry_at(rlp, idx)
            # END for each index to read
        # END for each reflog

        # finally remove our temporary data
        shutil.rmtree(tdir)
Ejemplo n.º 52
0
	def blame(self, rev, file):
		"""The blame information for the given file at the given revision.

		:parm rev: revision specifier, see git-rev-parse for viable options.
		:return:
			list: [git.Commit, list: [<line>]]
			A list of tuples associating a Commit object with a list of lines that 
			changed within the given commit. The Commit objects will be given in order
			of appearance."""
		data = self.git.blame(rev, '--', file, p=True)
		commits = dict()
		blames = list()
		info = None

		for line in data.splitlines(False):
			parts = self.re_whitespace.split(line, 1)
			firstpart = parts[0]
			if self.re_hexsha_only.search(firstpart):
				# handles 
				# 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7		- indicates blame-data start
				# 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
				digits = parts[-1].split(" ")
				if len(digits) == 3:
					info = {'id': firstpart}
					blames.append([None, []])
				# END blame data initialization
			else:
				m = self.re_author_committer_start.search(firstpart)
				if m:
					# handles: 
					# author Tom Preston-Werner
					# author-mail <*****@*****.**>
					# author-time 1192271832
					# author-tz -0700
					# committer Tom Preston-Werner
					# committer-mail <*****@*****.**>
					# committer-time 1192271832
					# committer-tz -0700  - IGNORED BY US
					role = m.group(0)
					if firstpart.endswith('-mail'):
						info["%s_email" % role] = parts[-1]
					elif firstpart.endswith('-time'):
						info["%s_date" % role] = int(parts[-1])
					elif role == firstpart:
						info[role] = parts[-1]
					# END distinguish mail,time,name
				else:
					# handle
					# filename lib/grit.rb
					# summary add Blob
					# <and rest>
					if firstpart.startswith('filename'):
						info['filename'] = parts[-1]
					elif firstpart.startswith('summary'):
						info['summary'] = parts[-1]
					elif firstpart == '':
						if info:
							sha = info['id']
							c = commits.get(sha)
							if c is None:
								c = Commit(	 self, hex_to_bin(sha),
											 author=Actor._from_string(info['author'] + ' ' + info['author_email']),
											 authored_date=info['author_date'],
											 committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
											 committed_date=info['committer_date'],
											 message=info['summary'])
								commits[sha] = c
							# END if commit objects needs initial creation
							m = self.re_tab_full_line.search(line)
							text,  = m.groups()
							blames[-1][0] = c
							blames[-1][1].append( text )
							info = None
						# END if we collected commit info
					# END distinguish filename,summary,rest
				# END distinguish author|committer vs filename,summary,rest
			# END distinguish hexsha vs other information
		return blames
Ejemplo n.º 53
0
    def blame(self, rev, file):
        """The blame information for the given file at the given revision.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return:
            list: [git.Commit, list: [<line>]]
            A list of tuples associating a Commit object with a list of lines that
            changed within the given commit. The Commit objects will be given in order
            of appearance."""
        data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
        commits = dict()
        blames = list()
        info = None

        keepends = True
        for line in data.splitlines(keepends):
            try:
                line = line.rstrip().decode(defenc)
            except UnicodeDecodeError:
                firstpart = ''
                is_binary = True
            else:
                # As we don't have an idea when the binary data ends, as it could contain multiple newlines
                # in the process. So we rely on being able to decode to tell us what is is.
                # This can absolutely fail even on text files, but even if it does, we should be fine treating it
                # as binary instead
                parts = self.re_whitespace.split(line, 1)
                firstpart = parts[0]
                is_binary = False
            # end handle decode of line

            if self.re_hexsha_only.search(firstpart):
                # handles
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7        - indicates blame-data start
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2          - indicates
                # another line of blame with the same data
                digits = parts[-1].split(" ")
                if len(digits) == 3:
                    info = {'id': firstpart}
                    blames.append([None, []])
                elif info['id'] != firstpart:
                    info = {'id': firstpart}
                    blames.append([commits.get(firstpart), []])
                # END blame data initialization
            else:
                m = self.re_author_committer_start.search(firstpart)
                if m:
                    # handles:
                    # author Tom Preston-Werner
                    # author-mail <*****@*****.**>
                    # author-time 1192271832
                    # author-tz -0700
                    # committer Tom Preston-Werner
                    # committer-mail <*****@*****.**>
                    # committer-time 1192271832
                    # committer-tz -0700  - IGNORED BY US
                    role = m.group(0)
                    if firstpart.endswith('-mail'):
                        info["%s_email" % role] = parts[-1]
                    elif firstpart.endswith('-time'):
                        info["%s_date" % role] = int(parts[-1])
                    elif role == firstpart:
                        info[role] = parts[-1]
                    # END distinguish mail,time,name
                else:
                    # handle
                    # filename lib/grit.rb
                    # summary add Blob
                    # <and rest>
                    if firstpart.startswith('filename'):
                        info['filename'] = parts[-1]
                    elif firstpart.startswith('summary'):
                        info['summary'] = parts[-1]
                    elif firstpart == '':
                        if info:
                            sha = info['id']
                            c = commits.get(sha)
                            if c is None:
                                c = Commit(self, hex_to_bin(sha),
                                           author=Actor._from_string(info['author'] + ' ' + info['author_email']),
                                           authored_date=info['author_date'],
                                           committer=Actor._from_string(
                                               info['committer'] + ' ' + info['committer_email']),
                                           committed_date=info['committer_date'],
                                           message=info['summary'])
                                commits[sha] = c
                            # END if commit objects needs initial creation
                            if not is_binary:
                                if line and line[0] == '\t':
                                    line = line[1:]
                            else:
                                # NOTE: We are actually parsing lines out of binary data, which can lead to the
                                # binary being split up along the newline separator. We will append this to the blame
                                # we are currently looking at, even though it should be concatenated with the last line
                                # we have seen.
                                pass
                            # end handle line contents
                            blames[-1][0] = c
                            blames[-1][1].append(line)
                            info = {'id': sha}
                        # END if we collected commit info
                    # END distinguish filename,summary,rest
                # END distinguish author|committer vs filename,summary,rest
            # END distinguish hexsha vs other information
        return blames
Ejemplo n.º 54
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of BlameEntry tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of BlameEntry tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev,
                              '--',
                              file,
                              p=True,
                              incremental=True,
                              stdout_as_string=False,
                              **kwargs)
        commits = dict()

        stream = (line for line in data.split(b'\n') if line)
        while True:
            line = next(
                stream
            )  # when exhausted, casues a StopIteration, terminating this function
            hexsha, orig_lineno, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            orig_lineno = int(orig_lineno)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        orig_filename = value
                        break

                c = Commit(
                    self,
                    hex_to_bin(hexsha),
                    author=Actor(
                        safe_decode(props[b'author']),
                        safe_decode(
                            props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                    authored_date=int(props[b'author-time']),
                    committer=Actor(
                        safe_decode(props[b'committer']),
                        safe_decode(props[b'committer-mail'].lstrip(
                            b'<').rstrip(b'>'))),
                    committed_date=int(props[b'committer-time']),
                    message=safe_decode(props[b'summary']))
                commits[hexsha] = c
            else:
                # Discard the next line (it's a filename end tag)
                line = next(stream)
                tag, value = line.split(b' ', 1)
                assert tag == b'filename', 'Unexpected git blame output'
                orig_filename = value

            yield BlameEntry(commits[hexsha], range(lineno,
                                                    lineno + num_lines),
                             safe_decode(orig_filename),
                             range(orig_lineno, orig_lineno + num_lines))
Ejemplo n.º 55
0
Archivo: db.py Proyecto: daleha/git-kit
	def info(self, sha):
		hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
		return OInfo(hex_to_bin(hexsha), typename, size)
Ejemplo n.º 56
0
    def store(self, istream):
        """note: The sha we produce will be hex by nature"""
        tmp_path = None
        writer = self.ostream()
        if writer is None:
            # open a tmp file to write the data to
            fd, tmp_path = tempfile.mkstemp(prefix="obj", dir=self._root_path)

            if istream.binsha is None:
                writer = FDCompressedSha1Writer(fd)
            else:
                writer = FDStream(fd)
            # END handle direct stream copies
        # END handle custom writer

        try:
            try:
                if istream.binsha is not None:
                    # copy as much as possible, the actual uncompressed item size might
                    # be smaller than the compressed version
                    stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
                else:
                    # write object with header, we have to make a new one
                    write_object(
                        istream.type, istream.size, istream.read, writer.write, chunk_size=self.stream_chunk_size
                    )
                # END handle direct stream copies
            finally:
                if tmp_path:
                    writer.close()
            # END assure target stream is closed
        except:
            if tmp_path:
                os.remove(tmp_path)
            raise
        # END assure tmpfile removal on error

        hexsha = None
        if istream.binsha:
            hexsha = istream.hexsha
        else:
            hexsha = writer.sha(as_hex=True)
        # END handle sha

        if tmp_path:
            obj_path = self.db_path(self.object_path(hexsha))
            obj_dir = dirname(obj_path)
            if not isdir(obj_dir):
                mkdir(obj_dir)
            # END handle destination directory
            # rename onto existing doesn't work on windows
            if os.name == "nt":
                if isfile(obj_path):
                    remove(tmp_path)
                else:
                    rename(tmp_path, obj_path)
                # end rename only if needed
            else:
                rename(tmp_path, obj_path)
            # END handle win32

            # make sure its readable for all ! It started out as rw-- tmp file
            # but needs to be rwrr
            chmod(obj_path, self.new_objects_mode)
        # END handle dry_run

        istream.binsha = hex_to_bin(hexsha)
        return istream