def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items :param data: data block with tree data (as bytes) :return: list(tuple(binsha, mode, tree_relative_path), ...)""" ord_zero = ord('0') space_ord = ord(' ') len_data = len(data) i = 0 out = list() while i < len_data: mode = 0 # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later while byte_ord(data[i]) != space_ord: # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character mode = (mode << 3) + (byte_ord(data[i]) - ord_zero) i += 1 # END while reading mode # byte is space now, skip it i += 1 # parse name, it is NULL separated ns = i while byte_ord(data[i]) != 0: i += 1 # END while not reached NULL # default encoding for strings in git is utf8 # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] try: name = name.decode(defenc) except UnicodeDecodeError: pass # END handle encoding # byte is NULL, get next 20 i += 1 sha = data[i:i + 20] i = i + 20 out.append((sha, mode, name)) # END for each byte in data stream return out
def tree_to_stream(entries, write): """Write the give list of entries into a stream using its write method :param entries: **sorted** list of tuples with (binsha, mode, name) :param write: write method which takes a data string""" ord_zero = ord('0') bit_mask = 7 # 3 bits set for binsha, mode, name in entries: mode_str = b'' for i in xrange(6): mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte # here it comes: if the name is actually unicode, the replacement below # will not work as the binsha is not part of the ascii unicode encoding - # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): name = name.encode(defenc) write(b''.join((mode_str, b' ', name, b'\0', binsha)))
def tree_to_stream(entries, write): """Write the give list of entries into a stream using its write method :param entries: **sorted** list of tuples with (binsha, mode, name) :param write: write method which takes a data string""" ord_zero = ord('0') bit_mask = 7 # 3 bits set for binsha, mode, name in entries: mode_str = b'' for i in xrange(6): mode_str = bchr(( (mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte # here it comes: if the name is actually unicode, the replacement below # will not work as the binsha is not part of the ascii unicode encoding - # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): name = name.encode(defenc) write(b''.join((mode_str, b' ', name, b'\0', binsha)))