Example #1
0
    def __init__(self, repo: 'Repo', a_rawpath: Optional[bytes],
                 b_rawpath: Optional[bytes], a_blob_id: Union[str, bytes,
                                                              None],
                 b_blob_id: Union[str, bytes, None], a_mode: Union[bytes, str,
                                                                   None],
                 b_mode: Union[bytes, str,
                               None], new_file: bool, deleted_file: bool,
                 copied_file: bool, raw_rename_from: Optional[bytes],
                 raw_rename_to: Optional[bytes], diff: Union[str, bytes, None],
                 change_type: Optional[Lit_change_type],
                 score: Optional[int]) -> None:

        assert a_rawpath is None or isinstance(a_rawpath, bytes)
        assert b_rawpath is None or isinstance(b_rawpath, bytes)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        self.a_mode = mode_str_to_int(a_mode) if a_mode else None
        self.b_mode = mode_str_to_int(b_mode) if b_mode else None

        # Determine whether this diff references a submodule, if it does then
        # we need to overwrite "repo" to the corresponding submodule's repo instead
        if repo and a_rawpath:
            for submodule in repo.submodules:
                if submodule.path == a_rawpath.decode(defenc, 'replace'):
                    if submodule.module_exists():
                        repo = submodule.module()
                    break

        self.a_blob: Union['IndexObject', None]
        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=self.a_path)

        self.b_blob: Union['IndexObject', None]
        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=self.b_path)

        self.new_file: bool = new_file
        self.deleted_file: bool = deleted_file
        self.copied_file: bool = copied_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from, bytes)
        assert raw_rename_to is None or isinstance(raw_rename_to, bytes)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
        self.change_type: Union[Lit_change_type, None] = change_type
        self.score = score
Example #2
0
    def partial_to_complete_sha_hex(self, partial_hexsha):
        len_partial_hexsha = len(partial_hexsha)
        if len_partial_hexsha % 2 != 0:
            partial_binsha = hex_to_bin(partial_hexsha + "0")
        else:
            partial_binsha = hex_to_bin(partial_hexsha)
        # END assure successful binary conversion

        candidate = None
        for db in self._dbs:
            full_bin_sha = None
            try:
                if hasattr(db, 'partial_to_complete_sha_hex'):
                    full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
                else:
                    full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
                # END handle database type
            except BadObject:
                continue
            # END ignore bad objects
            if full_bin_sha:
                if candidate and candidate != full_bin_sha:
                    raise AmbiguousObjectName(partial_hexsha)
                candidate = full_bin_sha
            # END handle candidate
        # END for each db
        if not candidate:
            raise BadObject(partial_binsha)
        return candidate
Example #3
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     len_partial_hexsha = len(partial_hexsha)
     if len_partial_hexsha % 2 != 0:
         partial_binsha = hex_to_bin(partial_hexsha + "0")
     else:
         partial_binsha = hex_to_bin(partial_hexsha)
     # END assure successful binary conversion 
     
     candidate = None
     for db in self._dbs:
         full_bin_sha = None
         try:
             if hasattr(db, 'partial_to_complete_sha_hex'):
                 full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
             else:
                 full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
             # END handle database type
         except BadObject:
             continue
         # END ignore bad objects
         if full_bin_sha:
             if candidate and candidate != full_bin_sha:
                 raise AmbiguousObjectName(partial_hexsha)
             candidate = full_bin_sha
         # END handle candidate
     # END for each db
     if not candidate:
         raise BadObject(partial_binsha)
     return candidate
Example #4
0
    def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
                 b_mode, new_file, deleted_file, rename_from, rename_to, diff):

        self.a_mode = a_mode
        self.b_mode = b_mode

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=a_path)
        if b_blob_id is None:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        self.rename_from = rename_from or None
        self.rename_to = rename_to or None

        self.diff = diff
Example #5
0
	def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
				 b_mode, new_file, deleted_file, rename_from,
				 rename_to, diff):
	
		self.a_mode = a_mode
		self.b_mode = b_mode
		
		if self.a_mode:
			self.a_mode = mode_str_to_int(self.a_mode)
		if self.b_mode:
			self.b_mode = mode_str_to_int(self.b_mode)
		
		if a_blob_id is None:
			self.a_blob = None
		else:
			self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
		if b_blob_id is None:
			self.b_blob = None
		else:
			self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
		
		self.new_file = new_file
		self.deleted_file = deleted_file
		
		# be clear and use None instead of empty strings
		self.rename_from = rename_from or None
		self.rename_to = rename_to or None
		
		self.diff = diff
Example #6
0
    def test_reading(self):
        gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
        assert isinstance(gdb.info(git_sha), OInfo)
        assert isinstance(gdb.stream(git_sha), OStream)
        assert gdb.size() > 200
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin(
            "5aebcd5cb3340fb31776941d7e4d518a712a8655")

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list[:50]):
            assert gdb.partial_to_complete_sha_hex(
                bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex,
                              "0000")
Example #7
0
 def test_reading(self):
     gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))
     
     # we have packs and loose objects, alternates doesn't necessarily exist
     assert 1 < len(gdb.databases()) < 4
     
     # access should be possible
     git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
     assert isinstance(gdb.info(git_sha), OInfo)
     assert isinstance(gdb.stream(git_sha), OStream)
     assert gdb.size() > 200
     sha_list = list(gdb.sha_iter())
     assert len(sha_list) == gdb.size()
     
     
     # This is actually a test for compound functionality, but it doesn't 
     # have a separate test module
     # test partial shas
     # this one as uneven and quite short
     assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
     
     # mix even/uneven hexshas
     for i, binsha in enumerate(sha_list[:50]):
         assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
     # END for each sha
     
     self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
Example #8
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     len_partial_hexsha = len(partial_hexsha)
     if len_partial_hexsha % 2 != 0:
         partial_binsha = hex_to_bin(partial_hexsha + "0")
     else:
         partial_binsha = hex_to_bin(partial_hexsha)
     # END assure successful binary conversion
     return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
Example #9
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     len_partial_hexsha = len(partial_hexsha)
     if len_partial_hexsha % 2 != 0:
         partial_binsha = hex_to_bin(partial_hexsha + "0")
     else:
         partial_binsha = hex_to_bin(partial_hexsha)
     # END assure successful binary conversion
     return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
Example #10
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
        Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')

        self.parents = list()
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith('parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
        # END for each parent line
        self.parents = tuple(self.parents)

        self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
        self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding
        # read encoding or empty line to separate message
        enc = readline()
        enc = enc.strip()
        if enc:
            self.encoding = enc[enc.find(' ') + 1:]
            # now comes the message separator
            readline()
        # END handle encoding

        # decode the authors name
        try:
            self.author.name = self.author.name.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (
                self.author.name, self.encoding)
        # END handle author's encoding

        # decode committer name
        try:
            self.committer.name = self.committer.name.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (
                self.committer.name, self.encoding)
        # END handle author's encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        try:
            self.message = self.message.decode(self.encoding)
        except UnicodeDecodeError:
            print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
        # END exception handling
        return self
Example #11
0
	def _deserialize(self, stream):
		""":param from_rev_list: if true, the stream format is coming from the rev-list command
		Otherwise it is assumed to be a plain data stream from our object"""
		readline = stream.readline
		self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')

		self.parents = list()
		next_line = None
		while True:
			parent_line = readline()
			if not parent_line.startswith('parent'):
				next_line = parent_line
				break
			# END abort reading parents
			self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
		# END for each parent line
		self.parents = tuple(self.parents)
		
		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
		
		
		# now we can have the encoding line, or an empty line followed by the optional
		# message.
		self.encoding = self.default_encoding
		# read encoding or empty line to separate message
		enc = readline()
		enc = enc.strip()
		if enc:
			self.encoding = enc[enc.find(' ')+1:]
			# now comes the message separator 
			readline()
		# END handle encoding
		
		# decode the authors name
		try:
			self.author.name = self.author.name.decode(self.encoding) 
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
		# END handle author's encoding
		
		# decode committer name
		try:
			self.committer.name = self.committer.name.decode(self.encoding) 
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
		# END handle author's encoding
		
		# a stream from our data simply gives us the plain message
		# The end of our message stream is marked with a newline that we strip
		self.message = stream.read()
		try:
			self.message = self.message.decode(self.encoding)
		except UnicodeDecodeError:
			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
		# END exception handling 
		return self
Example #12
0
    def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id,
                 a_mode, b_mode, new_file, deleted_file, copied_file,
                 raw_rename_from, raw_rename_to, diff, change_type, score):

        self.a_mode = a_mode
        self.b_mode = b_mode

        assert a_rawpath is None or isinstance(a_rawpath, binary_type)
        assert b_rawpath is None or isinstance(b_rawpath, binary_type)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        # Determine whether this diff references a submodule, if it does then
        # we need to overwrite "repo" to the corresponding submodule's repo instead
        if repo and a_rawpath:
            for submodule in repo.submodules:
                if submodule.path == a_rawpath.decode("utf-8"):
                    if submodule.module_exists():
                        repo = submodule.module()
                    break

        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=self.a_path)

        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=self.b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file
        self.copied_file = copied_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from,
                                                     binary_type)
        assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
        self.change_type = change_type
        self.score = score
Example #13
0
	def test_decompress_reader_special_case(self):
		odb = PureLooseObjectODB(fixture_path('objects'))
		ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
		
		# if there is a bug, we will be missing one byte exactly !
		data = ostream.read()
		assert len(data) == ostream.size
Example #14
0
    def _set_cache_(self, attr):
        """Cache all our attributes at once"""
        if attr in TagObject.__slots__:
            ostream = self.odb.stream(self.binsha)
            lines = ostream.read().splitlines()

            obj, hexsha = lines[0].split(" ")       # object <hexsha>
            type_token, type_name = lines[1].split(" ")  # type <type_name>
            self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))

            self.tag = lines[2][4:]  # tag <tag name>

            tagger_info = lines[3][7:]  # tagger <actor> <date>
            self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)

            # line 4 empty - it could mark the beginning of the next header
            # in case there really is no message, it would not exist. Otherwise
            # a newline separates header from message
            if len(lines) > 5:
                self.message = "\n".join(lines[5:])
            else:
                self.message = ''
        # END check our attributes
        else:
            super(TagObject, self)._set_cache_(attr)
Example #15
0
    def _iter_from_process_or_stream(cls, odb, proc_or_stream):
        """Parse out commit information into a list of Commit objects
        We expect one-line per commit, and parse the actual commit information directly
        from our lighting fast object database

        :param proc: git-rev-list process instance - one sha per line
        :return: iterator returning Commit objects"""
        stream = proc_or_stream
        close_std_err = False
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout
            close_std_err = True

        readline = stream.readline
        try:
            while True:
                line = readline()
                if not line:
                    break
                hexsha = line.strip()
                if len(hexsha) > 40:
                    # split additional information, as returned by bisect for instance
                    hexsha, rest = line.split(None, 1)
                # END handle extra info

                assert len(hexsha) == 40, "Invalid line: %s" % hexsha
                yield cls(odb, hex_to_bin(hexsha))
            # END for each line in stream
        finally:
            stream.close()
            if close_std_err:
                proc_or_stream.stderr.close()
Example #16
0
	def _set_cache_(self, attr):
		"""Cache all our attributes at once"""
		if attr in TagObject.__slots__:
			ostream = self.odb.stream(self.binsha)
			lines = ostream.read().splitlines()
			
			obj, hexsha = lines[0].split(" ")		# object <hexsha>
			type_token, type_name = lines[1].split(" ") # type <type_name>
			self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
			
			self.tag = lines[2][4:]	 # tag <tag name>
			
			tagger_info = lines[3][7:]# tagger <actor> <date>
			self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
			
			# line 4 empty - it could mark the beginning of the next header
			# in case there really is no message, it would not exist. Otherwise 
			# a newline separates header from message
			if len(lines) > 5:
				self.message = "\n".join(lines[5:])
			else:
				self.message = ''
		# END check our attributes
		else:
			super(TagObject, self)._set_cache_(attr)
Example #17
0
 def store(self, istream):
     # TODO: remove this check once the required functionality was merged in pygit2
     if hasattr(self._py2_repo, 'write'):
         istream.binsha = hex_to_bin(self._py2_repo.write(type_to_type_id_map[istream.type], istream.read()))
         return istream
     else:
         return super(Pygit2GitODB, self).store(istream)
Example #18
0
	def store(self, istream):
		# TODO: remove this check once the required functionality was merged in pygit2
		if hasattr(self._py2_repo, 'write'):
			istream.binsha = hex_to_bin(self._py2_repo.write(type_to_type_id_map[istream.type], istream.read()))
			return istream
		else:
			return super(Pygit2GitODB, self).store(istream)
Example #19
0
    def _iter_from_process_or_stream(cls, repo, proc_or_stream):
        """Parse out commit information into a list of Commit objects
        We expect one-line per commit, and parse the actual commit information directly
        from our lighting fast object database

        :param proc: git-rev-list process instance - one sha per line
        :return: iterator returning Commit objects"""
        stream = proc_or_stream
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout

        readline = stream.readline
        while True:
            line = readline()
            if not line:
                break
            hexsha = line.strip()
            if len(hexsha) > 40:
                # split additional information, as returned by bisect for instance
                hexsha, _ = line.split(None, 1)
            # END handle extra info

            assert len(hexsha) == 40, "Invalid line: %s" % hexsha
            yield Commit(repo, hex_to_bin(hexsha))
        # END for each line in stream
        # TODO: Review this - it seems process handling got a bit out of control
        # due to many developers trying to fix the open file handles issue
        if hasattr(proc_or_stream, 'wait'):
            finalize_process(proc_or_stream)
Example #20
0
    def test_index_merge_tree(self, rw_repo):
        # A bit out of place, but we need a different repo for this:
        self.assertNotEqual(self.rorepo, rw_repo)
        self.assertEqual(len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))), 2)

        # SINGLE TREE MERGE
        # current index is at the (virtual) cur_commit
        next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c"
        parent_commit = rw_repo.head.commit.parents[0]
        manifest_key = IndexFile.entry_key('MANIFEST.in', 0)
        manifest_entry = rw_repo.index.entries[manifest_key]
        rw_repo.index.merge_tree(next_commit)
        # only one change should be recorded
        assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha

        rw_repo.index.reset(rw_repo.head)
        self.assertEqual(rw_repo.index.entries[manifest_key].binsha, manifest_entry.binsha)

        # FAKE MERGE
        #############
        # Add a change with a NULL sha that should conflict with next_commit. We
        # pretend there was a change, but we do not even bother adding a proper
        # sha for it ( which makes things faster of course )
        manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3]))
        # try write flag
        self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False))
        # add actually resolves the null-hex-sha for us as a feature, but we can
        # edit the index manually
        assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
        # must operate on the same index for this ! Its a bit problematic as
        # it might confuse people
        index = rw_repo.index
        index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
        index.write()
        self.assertEqual(rw_repo.index.entries[manifest_key].hexsha, Diff.NULL_HEX_SHA)

        # write an unchanged index ( just for the fun of it )
        rw_repo.index.write()

        # a three way merge would result in a conflict and fails as the command will
        # not overwrite any entries in our index and hence leave them unmerged. This is
        # mainly a protection feature as the current index is not yet in a tree
        self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit)

        # the only way to get the merged entries is to safe the current index away into a tree,
        # which is like a temporary commit for us. This fails as well as the NULL sha deos not
        # have a corresponding object
        # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
        # self.failUnlessRaises(GitCommandError, index.write_tree)

        # if missing objects are okay, this would work though ( they are always okay now )
        # As we can't read back the tree with NULL_SHA, we rather set it to something else
        index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:])
        tree = index.write_tree()

        # now make a proper three way merge with unmerged entries
        unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
        unmerged_blobs = unmerged_tree.unmerged_blobs()
        self.assertEqual(len(unmerged_blobs), 1)
        self.assertEqual(list(unmerged_blobs.keys())[0], manifest_key[0])
Example #21
0
 def test_writing(self, path):
     NULL_BIN_SHA = '\0'  * 20
     
     alt_path = os.path.join(path, 'alternates')
     rdb = PureReferenceDB(alt_path)
     assert len(rdb.databases()) == 0
     assert rdb.size() == 0
     assert len(list(rdb.sha_iter())) == 0
     
     # try empty, non-existing
     assert not rdb.has_object(NULL_BIN_SHA)
     
     
     # setup alternate file
     # add two, one is invalid
     own_repo_path = fixture_path('../../../.git/objects')       # use own repo
     self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
     rdb.update_cache()
     assert len(rdb.databases()) == 1
     
     # we should now find a default revision of ours
     git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
     assert rdb.has_object(git_sha)
     
     # remove valid
     self.make_alt_file(alt_path, ["just/one/invalid/path"])
     rdb.update_cache()
     assert len(rdb.databases()) == 0
     
     # add valid
     self.make_alt_file(alt_path, [own_repo_path])
     rdb.update_cache()
     assert len(rdb.databases()) == 1
Example #22
0
    def test_index_merge_tree(self, rw_repo):
        # A bit out of place, but we need a different repo for this:
        self.assertNotEqual(self.rorepo, rw_repo)
        self.assertEqual(len(set((self.rorepo, self.rorepo, rw_repo, rw_repo))), 2)

        # SINGLE TREE MERGE
        # current index is at the (virtual) cur_commit
        next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c"
        parent_commit = rw_repo.head.commit.parents[0]
        manifest_key = IndexFile.entry_key('MANIFEST.in', 0)
        manifest_entry = rw_repo.index.entries[manifest_key]
        rw_repo.index.merge_tree(next_commit)
        # only one change should be recorded
        assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha

        rw_repo.index.reset(rw_repo.head)
        self.assertEqual(rw_repo.index.entries[manifest_key].binsha, manifest_entry.binsha)

        # FAKE MERGE
        #############
        # Add a change with a NULL sha that should conflict with next_commit. We
        # pretend there was a change, but we do not even bother adding a proper
        # sha for it ( which makes things faster of course )
        manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3]))
        # try write flag
        self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False))
        # add actually resolves the null-hex-sha for us as a feature, but we can
        # edit the index manually
        assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA
        # must operate on the same index for this ! Its a bit problematic as
        # it might confuse people
        index = rw_repo.index
        index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry)
        index.write()
        self.assertEqual(rw_repo.index.entries[manifest_key].hexsha, Diff.NULL_HEX_SHA)

        # write an unchanged index ( just for the fun of it )
        rw_repo.index.write()

        # a three way merge would result in a conflict and fails as the command will
        # not overwrite any entries in our index and hence leave them unmerged. This is
        # mainly a protection feature as the current index is not yet in a tree
        self.failUnlessRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit)

        # the only way to get the merged entries is to safe the current index away into a tree,
        # which is like a temporary commit for us. This fails as well as the NULL sha deos not
        # have a corresponding object
        # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true
        # self.failUnlessRaises(GitCommandError, index.write_tree)

        # if missing objects are okay, this would work though ( they are always okay now )
        # As we can't read back the tree with NULL_SHA, we rather set it to something else
        index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin('f' * 40),) + manifest_entry[2:])
        tree = index.write_tree()

        # now make a proper three way merge with unmerged entries
        unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit)
        unmerged_blobs = unmerged_tree.unmerged_blobs()
        self.assertEqual(len(unmerged_blobs), 1)
        self.assertEqual(list(unmerged_blobs.keys())[0], manifest_key[0])
Example #23
0
    def _iter_from_process_or_stream(cls, odb, proc_or_stream):
        """Parse out commit information into a list of Commit objects
        We expect one-line per commit, and parse the actual commit information directly
        from our lighting fast object database

        :param proc: git-rev-list process instance - one sha per line
        :return: iterator returning Commit objects"""
        stream = proc_or_stream
        close_std_err = False
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout
            close_std_err = True

        readline = stream.readline
        try:
            while True:
                line = readline()
                if not line:
                    break
                hexsha = line.strip()
                if len(hexsha) > 40:
                    # split additional information, as returned by bisect for instance
                    hexsha, rest = line.split(None, 1)
                # END handle extra info

                assert len(hexsha) == 40, "Invalid line: %s" % hexsha
                yield cls(odb, hex_to_bin(hexsha))
            # END for each line in stream
        finally:
            stream.close()
            if close_std_err:
                proc_or_stream.stderr.close()
Example #24
0
    def _iter_from_process_or_stream(cls, repo, proc_or_stream):
        """Parse out commit information into a list of Commit objects
        We expect one-line per commit, and parse the actual commit information directly
        from our lighting fast object database

        :param proc: git-rev-list process instance - one sha per line
        :return: iterator returning Commit objects"""
        stream = proc_or_stream
        if not hasattr(stream, 'readline'):
            stream = proc_or_stream.stdout

        readline = stream.readline
        while True:
            line = readline()
            if not line:
                break
            hexsha = line.strip()
            if len(hexsha) > 40:
                # split additional information, as returned by bisect for instance
                hexsha, _ = line.split(None, 1)
            # END handle extra info

            assert len(hexsha) == 40, "Invalid line: %s" % hexsha
            yield Commit(repo, hex_to_bin(hexsha))
        # END for each line in stream
        # TODO: Review this - it seems process handling got a bit out of control
        # due to many developers trying to fix the open file handles issue
        if hasattr(proc_or_stream, 'wait'):
            finalize_process(proc_or_stream)
 def _get_object(self):
     """
     :return:
         The object our ref currently refers to. Refs can be cached, they will
         always point to the actual object as it gets re-created on each query"""
     # have to be dynamic here as we may be a tag which can point to anything
     # Our path will be resolved to the hexsha which will be used accordingly
     return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
Example #26
0
    def test_decompress_reader_special_case(self):
        odb = PureLooseObjectODB(fixture_path('objects'))
        ostream = odb.stream(
            hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))

        # if there is a bug, we will be missing one byte exactly !
        data = ostream.read()
        assert len(data) == ostream.size
 def _get_object(self):
     """
     :return:
         The object our ref currently refers to. Refs can be cached, they will
         always point to the actual object as it gets re-created on each query"""
     # have to be dynamic here as we may be a tag which can point to anything
     # Our path will be resolved to the hexsha which will be used accordingly
     return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
Example #28
0
    def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id,
                 a_mode, b_mode, new_file, deleted_file, raw_rename_from,
                 raw_rename_to, diff, change_type, score):

        self.a_mode = a_mode
        self.b_mode = b_mode

        assert a_rawpath is None or isinstance(a_rawpath, binary_type)
        assert b_rawpath is None or isinstance(b_rawpath, binary_type)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo,
                               hex_to_bin(a_blob_id),
                               mode=self.a_mode,
                               path=self.a_path)

        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo,
                               hex_to_bin(b_blob_id),
                               mode=self.b_mode,
                               path=self.b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from,
                                                     binary_type)
        assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
        self.change_type = change_type
        self.score = score
Example #29
0
    def stream(self, sha):
        """For now, all lookup is done by git itself
        :note: As we don't know when the stream is actually read (and if it is 
            stored for later use) we read the data rigth away and cache it.
            This has HUGE performance implication, both for memory as for 
            reading/deserializing objects, but we have no other choice in order
            to make the database behaviour consistent with other implementations !"""

        hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
        return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
Example #30
0
 def stream(self, sha):
     """For now, all lookup is done by git itself
     :note: As we don't know when the stream is actually read (and if it is 
         stored for later use) we read the data rigth away and cache it.
         This has HUGE performance implication, both for memory as for 
         reading/deserializing objects, but we have no other choice in order
         to make the database behaviour consistent with other implementations !"""
     
     hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
     return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
Example #31
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: Full binary 20 byte sha from the given partial hexsha
     :raise AmbiguousObjectName:
     :raise BadObject:
     :note: currently we only raise BadObject as git does not communicate 
         AmbiguousObjects separately"""
     try:
         hexsha, typename, size = self._git.get_object_header(partial_hexsha)
         return hex_to_bin(hexsha)
     except (GitCommandError, ValueError):
         raise BadObject(partial_hexsha)
Example #32
0
 def partial_to_complete_sha_hex(self, partial_hexsha: str) -> bytes:
     """:return: Full binary 20 byte sha from the given partial hexsha
     :raise AmbiguousObjectName:
     :raise BadObject:
     :note: currently we only raise BadObject as git does not communicate
         AmbiguousObjects separately"""
     try:
         hexsha, _typename, _size = self._git.get_object_header(partial_hexsha)
         return hex_to_bin(hexsha)
     except (GitCommandError, ValueError) as e:
         raise BadObject(partial_hexsha) from e
Example #33
0
    def sha_iter(self):
        # find all files which look like an object, extract sha from there
        for root, dirs, files in os.walk(self.root_path()):
            root_base = basename(root)
            if len(root_base) != 2:
                continue

            for f in files:
                if len(f) != 38:
                    continue
                yield hex_to_bin(root_base + f)
Example #34
0
    def sha_iter(self):
        # find all files which look like an object, extract sha from there
        for root, dirs, files in os.walk(self.root_path()):
            root_base = basename(root)
            if len(root_base) != 2:
                continue

            for f in files:
                if len(f) != 38:
                    continue
                yield hex_to_bin(root_base + f)
Example #35
0
def name_to_object(
    repo: 'Repo',
    name: str,
    return_ref: bool = False
) -> Union[SymbolicReference, 'Commit', 'TagObject', 'Blob', 'Tree']:
    """
    :return: object specified by the given name, hexshas ( short and long )
        as well as references are supported
    :param return_ref: if name specifies a reference, we will return the reference
        instead of the object. Otherwise it will raise BadObject or BadName
    """
    hexsha: Union[None, str, bytes] = None

    # is it a hexsha ? Try the most common ones, which is 7 to 40
    if repo.re_hexsha_shortened.match(name):
        if len(name) != 40:
            # find long sha for short sha
            hexsha = short_to_long(repo.odb, name)
        else:
            hexsha = name
        # END handle short shas
    # END find sha if it matches

    # if we couldn't find an object for what seemed to be a short hexsha
    # try to find it as reference anyway, it could be named 'aaa' for instance
    if hexsha is None:
        for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s',
                     'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
            try:
                hexsha = SymbolicReference.dereference_recursive(
                    repo, base % name)
                if return_ref:
                    return SymbolicReference(repo, base % name)
                # END handle symbolic ref
                break
            except ValueError:
                pass
        # END for each base
    # END handle hexsha

    # didn't find any ref, this is an error
    if return_ref:
        raise BadObject("Couldn't find reference named %r" % name)
    # END handle return ref

    # tried everything ? fail
    if hexsha is None:
        raise BadName(name)
    # END assert hexsha was found

    return Object.new_from_sha(repo, hex_to_bin(hexsha))
Example #36
0
    def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
                 b_mode, new_file, deleted_file, raw_rename_from,
                 raw_rename_to, diff, change_type):

        self.a_mode = a_mode
        self.b_mode = b_mode

        assert a_rawpath is None or isinstance(a_rawpath, binary_type)
        assert b_rawpath is None or isinstance(b_rawpath, binary_type)
        self.a_rawpath = a_rawpath
        self.b_rawpath = b_rawpath

        if self.a_mode:
            self.a_mode = mode_str_to_int(self.a_mode)
        if self.b_mode:
            self.b_mode = mode_str_to_int(self.b_mode)

        if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
            self.a_blob = None
        else:
            self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)

        if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
            self.b_blob = None
        else:
            self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)

        self.new_file = new_file
        self.deleted_file = deleted_file

        # be clear and use None instead of empty strings
        assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
        assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
        self.raw_rename_from = raw_rename_from or None
        self.raw_rename_to = raw_rename_to or None

        self.diff = diff
        self.change_type = change_type
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        self.assertEqual(len(types), len(self.type_tuples))

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            self.assertEqual(item.hexsha, hexsha)
            self.assertEqual(item.type, typename)
            assert item.size
            self.assertEqual(item, item)
            self.assertNotEqual(not item, item)
            self.assertEqual(str(item), item.hexsha)
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):                        # never runs here
                    assert not item.path.startswith("/")        # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfilename = tempfile.mktemp(suffix='test-stream')
            with open(tmpfilename, 'wb+') as tmpfile:
                self.assertEqual(item, item.stream_data(tmpfile))
                tmpfile.seek(0)
                self.assertEqual(tmpfile.read(), data)
            os.remove(tmpfilename)
        # END for each object type to create

        # each has a unique sha
        self.assertEqual(len(s), num_objs)
        self.assertEqual(len(s | s), num_objs)
        self.assertEqual(num_index_objs, 2)
Example #38
0
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        self.assertEqual(len(types), len(self.type_tuples))

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            self.assertEqual(item.hexsha, hexsha)
            self.assertEqual(item.type, typename)
            assert item.size
            self.assertEqual(item, item)
            self.assertNotEqual(not item, item)
            self.assertEqual(str(item), item.hexsha)
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):  # never runs here
                    assert not item.path.startswith("/")  # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfilename = tempfile.mktemp(suffix='test-stream')
            with open(tmpfilename, 'wb+') as tmpfile:
                self.assertEqual(item, item.stream_data(tmpfile))
                tmpfile.seek(0)
                self.assertEqual(tmpfile.read(), data)
            os.remove(tmpfilename)
        # END for each object type to create

        # each has a unique sha
        self.assertEqual(len(s), num_objs)
        self.assertEqual(len(s | s), num_objs)
        self.assertEqual(num_index_objs, 2)
Example #39
0
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        assert len(types) == len(self.type_tuples)

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            assert item.hexsha == hexsha
            assert item.type == typename
            assert item.size
            assert item == item
            assert not item != item
            assert str(item) == item.hexsha
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):  # never runs here
                    assert not item.path.startswith("/")  # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfile = os.tmpfile()
            assert item == item.stream_data(tmpfile)
            tmpfile.seek(0)
            assert tmpfile.read() == data
            # END stream to file directly
        # END for each object type to create

        # each has a unique sha
        assert len(s) == num_objs
        assert len(s | s) == num_objs
        assert num_index_objs == 2
Example #40
0
    def test_base_object(self):
        # test interface of base object classes
        types = (Blob, Tree, Commit, TagObject)
        assert len(types) == len(self.type_tuples)

        s = set()
        num_objs = 0
        num_index_objs = 0
        for obj_type, (typename, hexsha, path) in zip(types, self.type_tuples):
            binsha = hex_to_bin(hexsha)
            item = None
            if path is None:
                item = obj_type(self.rorepo, binsha)
            else:
                item = obj_type(self.rorepo, binsha, 0, path)
            # END handle index objects
            num_objs += 1
            assert item.hexsha == hexsha
            assert item.type == typename
            assert item.size
            assert item == item
            assert not item != item
            assert str(item) == item.hexsha
            assert repr(item)
            s.add(item)

            if isinstance(item, base.IndexObject):
                num_index_objs += 1
                if hasattr(item, 'path'):                        # never runs here
                    assert not item.path.startswith("/")        # must be relative
                    assert isinstance(item.mode, int)
            # END index object check

            # read from stream
            data_stream = item.data_stream
            data = data_stream.read()
            assert data

            tmpfile = os.tmpfile()
            assert item == item.stream_data(tmpfile)
            tmpfile.seek(0)
            assert tmpfile.read() == data
            # END stream to file directly
        # END for each object type to create

        # each has a unique sha
        assert len(s) == num_objs
        assert len(s | s) == num_objs
        assert num_index_objs == 2
Example #41
0
def extract_options(repo, blob_id):
    from git.util import hex_to_bin

    blob = git.Blob(repo, hex_to_bin(blob_id))
    content = blob.data_stream.stream
    options = set()

    for line in iter(content.readline, b''):
        line = line.decode('utf-8').strip()

        if re_option.match(line):
            line = content.readline().decode('utf-8').strip()
            options.add(line.split(';')[0])

    return options
Example #42
0
def name_to_object(repo, name, return_ref=False):
    """
    :return: object specified by the given name, hexshas ( short and long )
        as well as references are supported
    :param return_ref: if name specifies a reference, we will return the reference
        instead of the object. Otherwise it will raise BadObject
    """
    hexsha = None
    
    # is it a hexsha ? Try the most common ones, which is 7 to 40
    if repo.re_hexsha_shortened.match(name):
        if len(name) != 40:
            # find long sha for short sha
            hexsha = short_to_long(repo.odb, name)
        else:
            hexsha = name
        # END handle short shas
    #END find sha if it matches
    
    # if we couldn't find an object for what seemed to be a short hexsha 
    # try to find it as reference anyway, it could be named 'aaa' for instance
    if hexsha is None:
        for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
            try:
                hexsha = SymbolicReference.dereference_recursive(repo, base % name)
                if return_ref:
                    return SymbolicReference(repo, base % name)
                #END handle symbolic ref
                break
            except ValueError:
                pass
        # END for each base
    # END handle hexsha

    # didn't find any ref, this is an error
    if return_ref:
        raise BadObject("Couldn't find reference named %r" % name)
    #END handle return ref

    # tried everything ? fail
    if hexsha is None:
        raise BadObject(name)
    # END assert hexsha was found
    
    return Object.new_from_sha(repo, hex_to_bin(hexsha))
Example #43
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
        Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]),
                         Tree.tree_id << 12, '')

        self.parents = []
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith(b'parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(
                type(self)(self.repo,
                           hex_to_bin(
                               parent_line.split()[-1].decode('ascii'))))
        # END for each parent line
        self.parents = tuple(self.parents)

        # we don't know actual author encoding before we have parsed it, so keep the lines around
        author_line = next_line
        committer_line = readline()

        # we might run into one or more mergetag blocks, skip those for now
        next_line = readline()
        while next_line.startswith(b'mergetag '):
            next_line = readline()
            while next_line.startswith(b' '):
                next_line = readline()
        # end skip mergetags

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding
        self.gpgsig = None

        # read headers
        enc = next_line
        buf = enc.strip()
        while buf:
            if buf[0:10] == b"encoding ":
                self.encoding = buf[buf.find(' ') + 1:].decode(
                    self.encoding, 'ignore')
            elif buf[0:7] == b"gpgsig ":
                sig = buf[buf.find(b' ') + 1:] + b"\n"
                is_next_header = False
                while True:
                    sigbuf = readline()
                    if not sigbuf:
                        break
                    if sigbuf[0:1] != b" ":
                        buf = sigbuf.strip()
                        is_next_header = True
                        break
                    sig += sigbuf[1:]
                # end read all signature
                self.gpgsig = sig.rstrip(b"\n").decode(self.encoding, 'ignore')
                if is_next_header:
                    continue
            buf = readline().strip()
        # decode the authors name

        try:
            self.author, self.authored_date, self.author_tz_offset = \
                parse_actor_and_date(author_line.decode(self.encoding, 'replace'))
        except UnicodeDecodeError:
            log.error("Failed to decode author line '%s' using encoding %s",
                      author_line,
                      self.encoding,
                      exc_info=True)

        try:
            self.committer, self.committed_date, self.committer_tz_offset = \
                parse_actor_and_date(committer_line.decode(self.encoding, 'replace'))
        except UnicodeDecodeError:
            log.error("Failed to decode committer line '%s' using encoding %s",
                      committer_line,
                      self.encoding,
                      exc_info=True)
        # END handle author's encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        try:
            self.message = self.message.decode(self.encoding, 'replace')
        except UnicodeDecodeError:
            log.error("Failed to decode message '%s' using encoding %s",
                      self.message,
                      self.encoding,
                      exc_info=True)
        # END exception handling

        return self
Example #44
0
    def blame(self, rev, file, incremental=False, **kwargs):
        """The blame information for the given file at the given revision.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return:
            list: [git.Commit, list: [<line>]]
            A list of tuples associating a Commit object with a list of lines that
            changed within the given commit. The Commit objects will be given in order
            of appearance."""
        if incremental:
            return self.blame_incremental(rev, file, **kwargs)

        data = self.git.blame(rev,
                              '--',
                              file,
                              p=True,
                              stdout_as_string=False,
                              **kwargs)
        commits = dict()
        blames = list()
        info = None

        keepends = True
        for line in data.splitlines(keepends):
            try:
                line = line.rstrip().decode(defenc)
            except UnicodeDecodeError:
                firstpart = ''
                is_binary = True
            else:
                # As we don't have an idea when the binary data ends, as it could contain multiple newlines
                # in the process. So we rely on being able to decode to tell us what is is.
                # This can absolutely fail even on text files, but even if it does, we should be fine treating it
                # as binary instead
                parts = self.re_whitespace.split(line, 1)
                firstpart = parts[0]
                is_binary = False
            # end handle decode of line

            if self.re_hexsha_only.search(firstpart):
                # handles
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7        - indicates blame-data start
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2          - indicates
                # another line of blame with the same data
                digits = parts[-1].split(" ")
                if len(digits) == 3:
                    info = {'id': firstpart}
                    blames.append([None, []])
                elif info['id'] != firstpart:
                    info = {'id': firstpart}
                    blames.append([commits.get(firstpart), []])
                # END blame data initialization
            else:
                m = self.re_author_committer_start.search(firstpart)
                if m:
                    # handles:
                    # author Tom Preston-Werner
                    # author-mail <*****@*****.**>
                    # author-time 1192271832
                    # author-tz -0700
                    # committer Tom Preston-Werner
                    # committer-mail <*****@*****.**>
                    # committer-time 1192271832
                    # committer-tz -0700  - IGNORED BY US
                    role = m.group(0)
                    if firstpart.endswith('-mail'):
                        info["%s_email" % role] = parts[-1]
                    elif firstpart.endswith('-time'):
                        info["%s_date" % role] = int(parts[-1])
                    elif role == firstpart:
                        info[role] = parts[-1]
                    # END distinguish mail,time,name
                else:
                    # handle
                    # filename lib/grit.rb
                    # summary add Blob
                    # <and rest>
                    if firstpart.startswith('filename'):
                        info['filename'] = parts[-1]
                    elif firstpart.startswith('summary'):
                        info['summary'] = parts[-1]
                    elif firstpart == '':
                        if info:
                            sha = info['id']
                            c = commits.get(sha)
                            if c is None:
                                c = Commit(
                                    self,
                                    hex_to_bin(sha),
                                    author=Actor._from_string(
                                        info['author'] + ' ' +
                                        info['author_email']),
                                    authored_date=info['author_date'],
                                    committer=Actor._from_string(
                                        info['committer'] + ' ' +
                                        info['committer_email']),
                                    committed_date=info['committer_date'])
                                commits[sha] = c
                            # END if commit objects needs initial creation
                            if not is_binary:
                                if line and line[0] == '\t':
                                    line = line[1:]
                            else:
                                # NOTE: We are actually parsing lines out of binary data, which can lead to the
                                # binary being split up along the newline separator. We will append this to the blame
                                # we are currently looking at, even though it should be concatenated with the last line
                                # we have seen.
                                pass
                            # end handle line contents
                            blames[-1][0] = c
                            blames[-1][1].append(line)
                            info = {'id': sha}
                        # END if we collected commit info
                    # END distinguish filename,summary,rest
                # END distinguish author|committer vs filename,summary,rest
            # END distinguish hexsha vs other information
        return blames
Example #45
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of BlameEntry tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of BlameEntry tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev,
                              '--',
                              file,
                              p=True,
                              incremental=True,
                              stdout_as_string=False,
                              **kwargs)
        commits = dict()

        stream = (line for line in data.split(b'\n') if line)
        while True:
            line = next(
                stream
            )  # when exhausted, causes a StopIteration, terminating this function
            hexsha, orig_lineno, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            orig_lineno = int(orig_lineno)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        orig_filename = value
                        break

                c = Commit(
                    self,
                    hex_to_bin(hexsha),
                    author=Actor(
                        safe_decode(props[b'author']),
                        safe_decode(
                            props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                    authored_date=int(props[b'author-time']),
                    committer=Actor(
                        safe_decode(props[b'committer']),
                        safe_decode(props[b'committer-mail'].lstrip(
                            b'<').rstrip(b'>'))),
                    committed_date=int(props[b'committer-time']))
                commits[hexsha] = c
            else:
                # Discard all lines until we find "filename" which is
                # guaranteed to be the last line
                while True:
                    line = next(
                        stream)  # will fail if we reach the EOF unexpectedly
                    tag, value = line.split(b' ', 1)
                    if tag == b'filename':
                        orig_filename = value
                        break

            yield BlameEntry(commits[hexsha], range(lineno,
                                                    lineno + num_lines),
                             safe_decode(orig_filename),
                             range(orig_lineno, orig_lineno + num_lines))
Example #46
0
    def test_base(self):
        rlp_head = fixture_path('reflog_HEAD')
        rlp_master = fixture_path('reflog_master')
        tdir = tempfile.mktemp(suffix="test_reflogs")
        os.mkdir(tdir)

        rlp_master_ro = RefLog.path(self.rorepo.head)
        assert osp.isfile(rlp_master_ro)

        # simple read
        reflog = RefLog.from_file(rlp_master_ro)
        assert reflog._path is not None
        assert isinstance(reflog, RefLog)
        assert len(reflog)

        # iter_entries works with path and with stream
        assert len(list(RefLog.iter_entries(open(rlp_master, 'rb'))))
        assert len(list(RefLog.iter_entries(rlp_master)))

        # raise on invalid revlog
        # TODO: Try multiple corrupted ones !
        pp = 'reflog_invalid_'
        for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'):
            self.assertRaises(ValueError, RefLog.from_file, fixture_path(pp + suffix))
        # END for each invalid file

        # cannot write an uninitialized reflog
        self.assertRaises(ValueError, RefLog().write)

        # test serialize and deserialize - results must match exactly
        binsha = hex_to_bin(('f' * 40).encode('ascii'))
        msg = "my reflog message"
        cr = self.rorepo.config_reader()
        for rlp in (rlp_head, rlp_master):
            reflog = RefLog.from_file(rlp)
            tfile = osp.join(tdir, osp.basename(rlp))
            reflog.to_file(tfile)
            assert reflog.write() is reflog

            # parsed result must match ...
            treflog = RefLog.from_file(tfile)
            assert treflog == reflog

            # ... as well as each bytes of the written stream
            assert open(tfile).read() == open(rlp).read()

            # append an entry
            entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg)
            assert entry.oldhexsha == IndexObject.NULL_HEX_SHA
            assert entry.newhexsha == 'f' * 40
            assert entry.message == msg
            assert RefLog.from_file(tfile)[-1] == entry

            # index entry
            # raises on invalid index
            self.assertRaises(IndexError, RefLog.entry_at, rlp, 10000)

            # indices can be positive ...
            assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry)
            RefLog.entry_at(rlp, 23)

            # ... and negative
            for idx in (-1, -24):
                RefLog.entry_at(rlp, idx)
            # END for each index to read
        # END for each reflog

        # finally remove our temporary data
        rmtree(tdir)
Example #47
0
    def _get_object_sha(self):
        """
		:return:
			The binary sha to the object our ref currently refers to. Refs can be cached, they will 
			always point to the actual object as it gets re-created on each query"""
        return hex_to_bin(self.dereference_recursive(self.repo, self.path))
Example #48
0
def rev_parse(repo, rev):
    """
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification as string, please see
        http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
        for details
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
            continue
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
            else:
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                else:
                    obj = name_to_object(repo, rev[:start])
                # END handle token
            # END handle refname

            if ref is not None:
                obj = ref.commit
            # END handle ref
        # END initialize obj on first token

        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" %
                                 rev)
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                try:
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass  # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                else:
                    # cannot do anything for non-tags
                    pass
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                try:
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError(
                        "Support for additional @{...} modes not implemented")
                # END handle revlog index

                try:
                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                # END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
            else:
                raise ValueError("Invalid output type: %s ( in %s )" %
                                 (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError(
                    "Could not accommodate requested object type %r, got %s" %
                    (output_type, obj.type))
            # END verify output type

            start = end + 1  # skip brace
            parsed_to = start
            continue
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                else:
                    break
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode

        parsed_to = start
        # handle hierarchy walk
        try:
            if token == "~":
                obj = to_commit(obj)
                for _ in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
            else:
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadName(
                "Invalid revision spec '%s' - not enough parent commits to reach '%s%i'"
                % (rev, token, num))
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError(
            "Didn't consume complete rev spec %s, consumed part: %s" %
            (rev, rev[:parsed_to]))

    return obj
Example #49
0
 def _get_object_sha(self):
     """
     :return:
         The binary sha to the object our ref currently refers to. Refs can be cached, they will 
         always point to the actual object as it gets re-created on each query"""
     return hex_to_bin(self.dereference_recursive(self.repo, self.path))
Example #50
0
    def test_base(self):
        rlp_head = fixture_path('reflog_HEAD')
        rlp_master = fixture_path('reflog_master')
        tdir = tempfile.mktemp(suffix="test_reflogs")
        os.mkdir(tdir)

        rlp_master_ro = RefLog.path(self.rorepo.head)
        assert osp.isfile(rlp_master_ro)

        # simple read
        reflog = RefLog.from_file(rlp_master_ro)
        assert reflog._path is not None
        assert isinstance(reflog, RefLog)
        assert len(reflog)

        # iter_entries works with path and with stream
        assert len(list(RefLog.iter_entries(open(rlp_master, 'rb'))))
        assert len(list(RefLog.iter_entries(rlp_master)))

        # raise on invalid revlog
        # TODO: Try multiple corrupted ones !
        pp = 'reflog_invalid_'
        for suffix in ('oldsha', 'newsha', 'email', 'date', 'sep'):
            self.failUnlessRaises(ValueError, RefLog.from_file, fixture_path(pp + suffix))
        # END for each invalid file

        # cannot write an uninitialized reflog
        self.failUnlessRaises(ValueError, RefLog().write)

        # test serialize and deserialize - results must match exactly
        binsha = hex_to_bin(('f' * 40).encode('ascii'))
        msg = "my reflog message"
        cr = self.rorepo.config_reader()
        for rlp in (rlp_head, rlp_master):
            reflog = RefLog.from_file(rlp)
            tfile = osp.join(tdir, osp.basename(rlp))
            reflog.to_file(tfile)
            assert reflog.write() is reflog

            # parsed result must match ...
            treflog = RefLog.from_file(tfile)
            assert treflog == reflog

            # ... as well as each bytes of the written stream
            assert open(tfile).read() == open(rlp).read()

            # append an entry
            entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg)
            assert entry.oldhexsha == IndexObject.NULL_HEX_SHA
            assert entry.newhexsha == 'f' * 40
            assert entry.message == msg
            assert RefLog.from_file(tfile)[-1] == entry

            # index entry
            # raises on invalid index
            self.failUnlessRaises(IndexError, RefLog.entry_at, rlp, 10000)

            # indices can be positive ...
            assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry)
            RefLog.entry_at(rlp, 23)

            # ... and negative
            for idx in (-1, -24):
                RefLog.entry_at(rlp, idx)
            # END for each index to read
        # END for each reflog

        # finally remove our temporary data
        rmtree(tdir)
Example #51
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of BlameEntry tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of BlameEntry tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
        commits = dict()

        stream = (line for line in data.split(b'\n') if line)
        while True:
            line = next(stream)  # when exhausted, casues a StopIteration, terminating this function
            hexsha, orig_lineno, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            orig_lineno = int(orig_lineno)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        orig_filename = value
                        break

                c = Commit(self, hex_to_bin(hexsha),
                           author=Actor(safe_decode(props[b'author']),
                                        safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                           authored_date=int(props[b'author-time']),
                           committer=Actor(safe_decode(props[b'committer']),
                                           safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
                           committed_date=int(props[b'committer-time']))
                commits[hexsha] = c
            else:
                # Discard the next line (it's a filename end tag)
                line = next(stream)
                tag, value = line.split(b' ', 1)
                assert tag == b'filename', 'Unexpected git blame output'
                orig_filename = value

            yield BlameEntry(commits[hexsha],
                             range(lineno, lineno + num_lines),
                             safe_decode(orig_filename),
                             range(orig_lineno, orig_lineno + num_lines))
Example #52
0
def blame(repo, start_commit, end_commit, filename):
    data = repo.git.blame('%s^..%s' % (start_commit, end_commit), '--', filename, p=True)
    commits = dict()
    blames = list()
    info = None

    for line in data.splitlines(False):
        parts = repo.re_whitespace.split(line, 1)
        firstpart = parts[0]
        if repo.re_hexsha_only.search(firstpart):
            # handles
            # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7		- indicates blame-data start
            # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
            digits = parts[-1].split(" ")
            if len(digits) == 3:
                info = {'id': firstpart}
                blames.append([None, []])
            elif info['id'] != firstpart:
                info = {'id': firstpart}
                blames.append([commits.get(firstpart), []])
            # END blame data initialization
        else:
            m = repo.re_author_committer_start.search(firstpart)
            if m:
                # handles:
                # author Tom Preston-Werner
                # author-mail <*****@*****.**>
                # author-time 1192271832
                # author-tz -0700
                # committer Tom Preston-Werner
                # committer-mail <*****@*****.**>
                # committer-time 1192271832
                # committer-tz -0700  - IGNORED BY US
                role = m.group(0)
                if firstpart.endswith('-mail'):
                    info["%s_email" % role] = parts[-1]
                elif firstpart.endswith('-time'):
                    info["%s_date" % role] = int(parts[-1])
                elif role == firstpart:
                    info[role] = parts[-1]
                # END distinguish mail,time,name
            else:
                # handle
                # filename lib/grit.rb
                # summary add Blob
                # <and rest>
                if firstpart.startswith('filename'):
                    info['filename'] = parts[-1]
                elif firstpart.startswith('summary'):
                    info['summary'] = parts[-1]
                elif firstpart.startswith('boundary'):
                    info['boundary'] = True
                elif firstpart == '':
                    if info:
                        sha = info['id']
                        c = commits.get(sha)
                        if c is None:
                            if info.get('boundary'):
                                commits[sha] = False
                            else:
                                c = repo.CommitCls(
                                    repo,
                                    hex_to_bin(sha),
                                    author=Actor._from_string(info['author'] + ' ' + info['author_email']),
                                    authored_date=info['author_date'],
                                    committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
                                    committed_date=info['committer_date'],
                                    message=info['summary']
                                )
                                commits[sha] = c
                        if c is not False:
                            # END if commit objects needs initial creation
                            m = repo.re_tab_full_line.search(line)
                            text,  = m.groups()
                            blames[-1][0] = c
                            blames[-1][1].append(text)
                        info = { 'id' : sha }
                    # END if we collected commit info
                # END distinguish filename,summary,rest
            # END distinguish author|committer vs filename,summary,rest
        # END distinguish hexsha vs other information

    for commit, lines in blames:
        if commit is not None:
            yield commit, lines
Example #53
0
    def blame(self, rev, file, incremental=False, **kwargs):
        """The blame information for the given file at the given revision.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return:
            list: [git.Commit, list: [<line>]]
            A list of tuples associating a Commit object with a list of lines that
            changed within the given commit. The Commit objects will be given in order
            of appearance."""
        if incremental:
            return self.blame_incremental(rev, file, **kwargs)

        data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False, **kwargs)
        commits = dict()
        blames = list()
        info = None

        keepends = True
        for line in data.splitlines(keepends):
            try:
                line = line.rstrip().decode(defenc)
            except UnicodeDecodeError:
                firstpart = ''
                is_binary = True
            else:
                # As we don't have an idea when the binary data ends, as it could contain multiple newlines
                # in the process. So we rely on being able to decode to tell us what is is.
                # This can absolutely fail even on text files, but even if it does, we should be fine treating it
                # as binary instead
                parts = self.re_whitespace.split(line, 1)
                firstpart = parts[0]
                is_binary = False
            # end handle decode of line

            if self.re_hexsha_only.search(firstpart):
                # handles
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7        - indicates blame-data start
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2          - indicates
                # another line of blame with the same data
                digits = parts[-1].split(" ")
                if len(digits) == 3:
                    info = {'id': firstpart}
                    blames.append([None, []])
                elif info['id'] != firstpart:
                    info = {'id': firstpart}
                    blames.append([commits.get(firstpart), []])
                # END blame data initialization
            else:
                m = self.re_author_committer_start.search(firstpart)
                if m:
                    # handles:
                    # author Tom Preston-Werner
                    # author-mail <*****@*****.**>
                    # author-time 1192271832
                    # author-tz -0700
                    # committer Tom Preston-Werner
                    # committer-mail <*****@*****.**>
                    # committer-time 1192271832
                    # committer-tz -0700  - IGNORED BY US
                    role = m.group(0)
                    if firstpart.endswith('-mail'):
                        info["%s_email" % role] = parts[-1]
                    elif firstpart.endswith('-time'):
                        info["%s_date" % role] = int(parts[-1])
                    elif role == firstpart:
                        info[role] = parts[-1]
                    # END distinguish mail,time,name
                else:
                    # handle
                    # filename lib/grit.rb
                    # summary add Blob
                    # <and rest>
                    if firstpart.startswith('filename'):
                        info['filename'] = parts[-1]
                    elif firstpart.startswith('summary'):
                        info['summary'] = parts[-1]
                    elif firstpart == '':
                        if info:
                            sha = info['id']
                            c = commits.get(sha)
                            if c is None:
                                c = Commit(self, hex_to_bin(sha),
                                           author=Actor._from_string(info['author'] + ' ' + info['author_email']),
                                           authored_date=info['author_date'],
                                           committer=Actor._from_string(
                                               info['committer'] + ' ' + info['committer_email']),
                                           committed_date=info['committer_date'])
                                commits[sha] = c
                            # END if commit objects needs initial creation
                            if not is_binary:
                                if line and line[0] == '\t':
                                    line = line[1:]
                            else:
                                # NOTE: We are actually parsing lines out of binary data, which can lead to the
                                # binary being split up along the newline separator. We will append this to the blame
                                # we are currently looking at, even though it should be concatenated with the last line
                                # we have seen.
                                pass
                            # end handle line contents
                            blames[-1][0] = c
                            blames[-1][1].append(line)
                            info = {'id': sha}
                        # END if we collected commit info
                    # END distinguish filename,summary,rest
                # END distinguish author|committer vs filename,summary,rest
            # END distinguish hexsha vs other information
        return blames
Example #54
0
    def _deserialize(self, stream):
        """:param from_rev_list: if true, the stream format is coming from the rev-list command
        Otherwise it is assumed to be a plain data stream from our object"""
        readline = stream.readline
        self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, '')

        self.parents = list()
        next_line = None
        while True:
            parent_line = readline()
            if not parent_line.startswith(b'parent'):
                next_line = parent_line
                break
            # END abort reading parents
            self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1].decode('ascii'))))
        # END for each parent line
        self.parents = tuple(self.parents)

        # we don't know actual author encoding before we have parsed it, so keep the lines around
        author_line = next_line
        committer_line = readline()

        # we might run into one or more mergetag blocks, skip those for now
        next_line = readline()
        while next_line.startswith(b'mergetag '):
            next_line = readline()
            while next_line.startswith(b' '):
                next_line = readline()
        # end skip mergetags

        # now we can have the encoding line, or an empty line followed by the optional
        # message.
        self.encoding = self.default_encoding
        self.gpgsig = None

        # read headers
        enc = next_line
        buf = enc.strip()
        while buf:
            if buf[0:10] == b"encoding ":
                self.encoding = buf[buf.find(' ') + 1:].decode('ascii')
            elif buf[0:7] == b"gpgsig ":
                sig = buf[buf.find(b' ') + 1:] + b"\n"
                is_next_header = False
                while True:
                    sigbuf = readline()
                    if not sigbuf:
                        break
                    if sigbuf[0:1] != b" ":
                        buf = sigbuf.strip()
                        is_next_header = True
                        break
                    sig += sigbuf[1:]
                # end read all signature
                self.gpgsig = sig.rstrip(b"\n").decode('ascii')
                if is_next_header:
                    continue
            buf = readline().strip()
        # decode the authors name

        try:
            self.author, self.authored_date, self.author_tz_offset = \
                parse_actor_and_date(author_line.decode(self.encoding, 'replace'))
        except UnicodeDecodeError:
            log.error("Failed to decode author line '%s' using encoding %s", author_line, self.encoding,
                      exc_info=True)

        try:
            self.committer, self.committed_date, self.committer_tz_offset = \
                parse_actor_and_date(committer_line.decode(self.encoding, 'replace'))
        except UnicodeDecodeError:
            log.error("Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding,
                      exc_info=True)
        # END handle author's encoding

        # a stream from our data simply gives us the plain message
        # The end of our message stream is marked with a newline that we strip
        self.message = stream.read()
        try:
            self.message = self.message.decode(self.encoding, 'replace')
        except UnicodeDecodeError:
            log.error("Failed to decode message '%s' using encoding %s", self.message, self.encoding, exc_info=True)
        # END exception handling

        return self
Example #55
0
 def stream(self, sha):
     """For now, all lookup is done by git itself"""
     hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
     return OStream(hex_to_bin(hexsha), typename, size, stream)
Example #56
0
def _h2b(prefix):
    return hex_to_bin(_pad(prefix))
Example #57
0
	def test_list(self):
		assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)[hex_to_bin('5117c9c8a4d3af19a9958677e45cda9269de1541')], Commit)
Example #58
0
 def info(self, sha):
     hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
     return OInfo(hex_to_bin(hexsha), typename, size)
Example #59
0
def rev_parse(repo, rev):
    """
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification, please see
        http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
        for details
    :note: Currently there is no access to the rev-log, rev-specs may only contain
        topological tokens such ~ and ^.
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""
    
    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search
    
    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
            continue
        # END handle start
        
        token = rev[start]
        
        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
            else:
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                else:
                    obj = name_to_object(repo, rev[:start])
                #END handle token
            #END handle refname
            
            if ref is not None:
                obj = ref.commit
            #END handle ref
        # END initialize obj on first token
        
        
        start += 1
        
        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" % rev)
            output_type = rev[start+1:end]  # exclude brace
            
            # handle type 
            if output_type == 'commit':
                pass # default
            elif output_type == 'tree':
                try:
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass    # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                else:
                    # cannot do anything for non-tags
                    pass
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Require Reference to access reflog"
                revlog_index = None
                try:
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type)+1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError("Support for additional @{...} modes not implemented")
                #END handle revlog index
                
                try:
                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                #END handle index out of bound
                
                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))
                
                # make it pass the following checks
                output_type = None
            else:
                raise ValueError("Invalid output type: %s ( in %s )"  % (output_type, rev))
            # END handle output type
            
            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
            # END verify ouput type
            
            start = end+1                   # skip brace
            parsed_to = start
            continue
        # END parse type
        
        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                else:
                    break
                # END handle number
            # END number parse loop
            
            # no explicit number given, 1 is the default
            # It could be 0 though 
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode
        
        
        parsed_to = start
        # handle hiererarchy walk
        try:
            if token == "~":
                obj = to_commit(obj)
                for item in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num-1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
            else:
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadObject("Invalid Revision in %s" % rev)
        # END exception handling
    # END parse loop
    
    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name
    
    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
    
    return obj