Esempio n. 1
    def test_commit_serialization(self):
        assert_commit_serialization(self.gitrwrepo, self.gitrwrepo.head, True)

        rwrepo = self.gitrwrepo
        make_object =
        # direct serialization - deserialization can be tested afterwards
        # serialization is probably limited on IO
        hc = rwrepo.commit(rwrepo.head)

        nc = 5000
        st = time()
        for i in xrange(nc):
            cm = Commit(rwrepo, Commit.NULL_BIN_SHA, hc.tree,
              , hc.authored_date, hc.author_tz_offset,
                        hc.committer, hc.committed_date, hc.committer_tz_offset,
                        str(i), parents=hc.parents, encoding=hc.encoding)

            stream = BytesIO()
            slen = stream.tell()

            cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
        # END commit creation
        elapsed = time() - st

        print("Serialized %i commits to loose objects in %f s ( %f commits / s )"
              % (nc, elapsed, nc / elapsed), file=sys.stderr)
Esempio n. 2
    def entry_at(cls, filepath, index):
        """:return: RefLogEntry at the given index
        :param filepath: full path to the index file from which to read the entry
        :param index: python list compatible index, i.e. it may be negative to
            specify an entry counted from the end of the list

        :raise IndexError: If the entry didn't exist

        .. note:: This method is faster as it only parses the entry at index, skipping
            all other lines. Nonetheless, the whole file has to be read if
            the index is negative
        fp = open(filepath, 'rb')
        if index < 0:
            return RefLogEntry.from_line(fp.readlines()[index].strip())
            # read until index is reached
            for i in xrange(index + 1):
                line = fp.readline()
                if not line:
                # END abort on eof
            # END handle runup

            if i != index or not line:
                raise IndexError
            # END handle exception

            return RefLogEntry.from_line(line.strip())
Esempio n. 3
def tree_to_stream(entries, write):
    """Write the give list of entries into a stream using its write method
    :param entries: **sorted** list of tuples with (binsha, mode, name)
    :param write: write method which takes a data string"""
    ord_zero = ord('0')
    bit_mask = 7            # 3 bits set

    for binsha, mode, name in entries:
        mode_str = b''
        for i in xrange(6):
            mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
        # END for each 8 octal value

        # git slices away the first octal if its zero
        if byte_ord(mode_str[0]) == ord_zero:
            mode_str = mode_str[1:]
        # END save a byte

        # here it comes:  if the name is actually unicode, the replacement below
        # will not work as the binsha is not part of the ascii unicode encoding -
        # hence we must convert to an utf8 string for it to work properly.
        # According to my tests, this is exactly what git does, that is it just
        # takes the input literally, which appears to be utf8 on linux.
        if isinstance(name, text_type):
            name = name.encode(defenc)
        write(b''.join((mode_str, b' ', name, b'\0', binsha)))
Esempio n. 4
def tree_to_stream(entries, write):
    """Write the give list of entries into a stream using its write method
    :param entries: **sorted** list of tuples with (binsha, mode, name)
    :param write: write method which takes a data string"""
    ord_zero = ord('0')
    bit_mask = 7  # 3 bits set

    for binsha, mode, name in entries:
        mode_str = b''
        for i in xrange(6):
            mode_str = bchr((
                (mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
        # END for each 8 octal value

        # git slices away the first octal if its zero
        if byte_ord(mode_str[0]) == ord_zero:
            mode_str = mode_str[1:]
        # END save a byte

        # here it comes:  if the name is actually unicode, the replacement below
        # will not work as the binsha is not part of the ascii unicode encoding -
        # hence we must convert to an utf8 string for it to work properly.
        # According to my tests, this is exactly what git does, that is it just
        # takes the input literally, which appears to be utf8 on linux.
        if isinstance(name, text_type):
            name = name.encode(defenc)
        write(b''.join((mode_str, b' ', name, b'\0', binsha)))
Esempio n. 5
    def test_commit_serialization(self):
        assert_commit_serialization(self.gitrwrepo, '58c78e6', True)

        rwrepo = self.gitrwrepo
        make_object =
        # direct serialization - deserialization can be tested afterwards
        # serialization is probably limited on IO
        hc = rwrepo.commit(rwrepo.head)

        nc = 5000
        st = time()
        for i in xrange(nc):
            cm = Commit(rwrepo,

            stream = BytesIO()
            slen = stream.tell()

            cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
        # END commit creation
        elapsed = time() - st

            "Serialized %i commits to loose objects in %f s ( %f commits / s )"
            % (nc, elapsed, nc / elapsed),
Esempio n. 6
def rev_parse(repo, rev):
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification as string, please see
        for details
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                    obj = name_to_object(repo, rev[:start])
                # END handle token
            # END handle refname

            if ref is not None:
                obj = ref.commit
            # END handle ref
        # END initialize obj on first token

        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" %
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass  # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                    # cannot do anything for non-tags
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError(
                        "Support for additional @{...} modes not implemented")
                # END handle revlog index

                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                # END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
                raise ValueError("Invalid output type: %s ( in %s )" %
                                 (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError(
                    "Could not accommodate requested object type %r, got %s" %
                    (output_type, obj.type))
            # END verify output type

            start = end + 1  # skip brace
            parsed_to = start
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode

        parsed_to = start
        # handle hierarchy walk
            if token == "~":
                obj = to_commit(obj)
                for _ in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadName(
                "Invalid revision spec '%s' - not enough parent commits to reach '%s%i'"
                % (rev, token, num))
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError(
            "Didn't consume complete rev spec %s, consumed part: %s" %
            (rev, rev[:parsed_to]))

    return obj
Esempio n. 7
    def move(self, items, skip_errors=False, **kwargs):
        """Rename/move the items, whereas the last item is considered the destination of
        the move operation. If the destination is a file, the first item ( of two )
        must be a file as well. If the destination is a directory, it may be preceeded
        by one or more directories or files.

        The working tree will be affected in non-bare repositories.

        :parma items:
            Multiple types of items are supported, please see the 'remove' method
            for reference.
        :param skip_errors:
            If True, errors such as ones resulting from missing source files will
            be skpped.
        :param kwargs:
            Additional arguments you would like to pass to git-mv, such as dry_run
            or force.

        :return:List(tuple(source_path_string, destination_path_string), ...)
            A list of pairs, containing the source file moved as well as its
            actual destination. Relative to the repository root.

        :raise ValueErorr: If only one item was given
            GitCommandError: If git could not handle your request"""
        args = list()
        if skip_errors:

        paths = self._items_to_rela_paths(items)
        if len(paths) < 2:
            raise ValueError(
                "Please provide at least one source and one destination of the move operation"

        was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None))
        kwargs['dry_run'] = True

        # first execute rename in dryrun so the command tells us what it actually does
        # ( for later output )
        out = list()
        mvlines =, paths, **kwargs).splitlines()

        # parse result - first 0:n/2 lines are 'checking ', the remaining ones
        # are the 'renaming' ones which we parse
        for ln in xrange(int(len(mvlines) / 2), len(mvlines)):
            tokens = mvlines[ln].split(' to ')
            assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]

            # [0] = Renaming x
            # [1] = y
            out.append((tokens[0][9:], tokens[1]))
        # END for each line to parse

        # either prepare for the real run, or output the dry-run result
        if was_dry_run:
            return out
        # END handle dryrun

        # now apply the actual operation
        kwargs.pop('dry_run'), paths, **kwargs)

        return out
Esempio n. 8
    def move(self, items, skip_errors=False, **kwargs):
        """Rename/move the items, whereas the last item is considered the destination of
        the move operation. If the destination is a file, the first item ( of two )
        must be a file as well. If the destination is a directory, it may be preceeded
        by one or more directories or files.

        The working tree will be affected in non-bare repositories.

        :parma items:
            Multiple types of items are supported, please see the 'remove' method
            for reference.
        :param skip_errors:
            If True, errors such as ones resulting from missing source files will
            be skpped.
        :param kwargs:
            Additional arguments you would like to pass to git-mv, such as dry_run
            or force.

        :return:List(tuple(source_path_string, destination_path_string), ...)
            A list of pairs, containing the source file moved as well as its
            actual destination. Relative to the repository root.

        :raise ValueErorr: If only one item was given
            GitCommandError: If git could not handle your request"""
        args = list()
        if skip_errors:

        paths = self._items_to_rela_paths(items)
        if len(paths) < 2:
            raise ValueError("Please provide at least one source and one destination of the move operation")

        was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None))
        kwargs['dry_run'] = True

        # first execute rename in dryrun so the command tells us what it actually does
        # ( for later output )
        out = list()
        mvlines =, paths, **kwargs).splitlines()

        # parse result - first 0:n/2 lines are 'checking ', the remaining ones
        # are the 'renaming' ones which we parse
        for ln in xrange(int(len(mvlines) / 2), len(mvlines)):
            tokens = mvlines[ln].split(' to ')
            assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]

            # [0] = Renaming x
            # [1] = y
            out.append((tokens[0][9:], tokens[1]))
        # END for each line to parse

        # either prepare for the real run, or output the dry-run result
        if was_dry_run:
            return out
        # END handle dryrun

        # now apply the actual operation
        kwargs.pop('dry_run'), paths, **kwargs)

        return out
Esempio n. 9
def rev_parse(repo, rev):
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification as string, please see
        for details
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                    obj = name_to_object(repo, rev[:start])
                # END handle token
            # END handle refname

            if ref is not None:
                obj = ref.commit
            # END handle ref
        # END initialize obj on first token

        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" % rev)
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass    # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                    # cannot do anything for non-tags
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError("Support for additional @{...} modes not implemented")
                # END handle revlog index

                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                # END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
                raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
            # END verify ouput type

            start = end + 1                   # skip brace
            parsed_to = start
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode

        parsed_to = start
        # handle hiererarchy walk
            if token == "~":
                obj = to_commit(obj)
                for item in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadName("Invalid revision spec '%s' - not enough parent commits to reach '%s%i'" % (rev, token, num))
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))

    return obj