Example #1
0
    def test_commit_serialization(self):
        assert_commit_serialization(self.gitrwrepo, self.gitrwrepo.head, True)

        rwrepo = self.gitrwrepo
        make_object = rwrepo.odb.store
        # direct serialization - deserialization can be tested afterwards
        # serialization is probably limited on IO
        hc = rwrepo.commit(rwrepo.head)

        nc = 5000
        st = time()
        for i in xrange(nc):
            cm = Commit(rwrepo, Commit.NULL_BIN_SHA, hc.tree,
                        hc.author, hc.authored_date, hc.author_tz_offset,
                        hc.committer, hc.committed_date, hc.committer_tz_offset,
                        str(i), parents=hc.parents, encoding=hc.encoding)

            stream = BytesIO()
            cm._serialize(stream)
            slen = stream.tell()
            stream.seek(0)

            cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
        # END commit creation
        elapsed = time() - st

        print("Serialized %i commits to loose objects in %f s ( %f commits / s )"
              % (nc, elapsed, nc / elapsed), file=sys.stderr)
Example #2
0
    def entry_at(cls, filepath, index):
        """:return: RefLogEntry at the given index
        :param filepath: full path to the index file from which to read the entry
        :param index: python list compatible index, i.e. it may be negative to
            specify an entry counted from the end of the list

        :raise IndexError: If the entry didn't exist

        .. note:: This method is faster as it only parses the entry at index, skipping
            all other lines. Nonetheless, the whole file has to be read if
            the index is negative
        """
        fp = open(filepath, 'rb')
        if index < 0:
            return RefLogEntry.from_line(fp.readlines()[index].strip())
        else:
            # read until index is reached
            for i in xrange(index + 1):
                line = fp.readline()
                if not line:
                    break
                # END abort on eof
            # END handle runup

            if i != index or not line:
                raise IndexError
            # END handle exception

            return RefLogEntry.from_line(line.strip())
Example #3
0
def tree_to_stream(entries, write):
    """Write the give list of entries into a stream using its write method
    :param entries: **sorted** list of tuples with (binsha, mode, name)
    :param write: write method which takes a data string"""
    ord_zero = ord('0')
    bit_mask = 7            # 3 bits set

    for binsha, mode, name in entries:
        mode_str = b''
        for i in xrange(6):
            mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
        # END for each 8 octal value

        # git slices away the first octal if its zero
        if byte_ord(mode_str[0]) == ord_zero:
            mode_str = mode_str[1:]
        # END save a byte

        # here it comes:  if the name is actually unicode, the replacement below
        # will not work as the binsha is not part of the ascii unicode encoding -
        # hence we must convert to an utf8 string for it to work properly.
        # According to my tests, this is exactly what git does, that is it just
        # takes the input literally, which appears to be utf8 on linux.
        if isinstance(name, text_type):
            name = name.encode(defenc)
        write(b''.join((mode_str, b' ', name, b'\0', binsha)))
Example #4
0
def tree_to_stream(entries, write):
    """Write the give list of entries into a stream using its write method
    :param entries: **sorted** list of tuples with (binsha, mode, name)
    :param write: write method which takes a data string"""
    ord_zero = ord('0')
    bit_mask = 7  # 3 bits set

    for binsha, mode, name in entries:
        mode_str = b''
        for i in xrange(6):
            mode_str = bchr((
                (mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str
        # END for each 8 octal value

        # git slices away the first octal if its zero
        if byte_ord(mode_str[0]) == ord_zero:
            mode_str = mode_str[1:]
        # END save a byte

        # here it comes:  if the name is actually unicode, the replacement below
        # will not work as the binsha is not part of the ascii unicode encoding -
        # hence we must convert to an utf8 string for it to work properly.
        # According to my tests, this is exactly what git does, that is it just
        # takes the input literally, which appears to be utf8 on linux.
        if isinstance(name, text_type):
            name = name.encode(defenc)
        write(b''.join((mode_str, b' ', name, b'\0', binsha)))
Example #5
0
    def test_commit_serialization(self):
        assert_commit_serialization(self.gitrwrepo, '58c78e6', True)

        rwrepo = self.gitrwrepo
        make_object = rwrepo.odb.store
        # direct serialization - deserialization can be tested afterwards
        # serialization is probably limited on IO
        hc = rwrepo.commit(rwrepo.head)

        nc = 5000
        st = time()
        for i in xrange(nc):
            cm = Commit(rwrepo,
                        Commit.NULL_BIN_SHA,
                        hc.tree,
                        hc.author,
                        hc.authored_date,
                        hc.author_tz_offset,
                        hc.committer,
                        hc.committed_date,
                        hc.committer_tz_offset,
                        str(i),
                        parents=hc.parents,
                        encoding=hc.encoding)

            stream = BytesIO()
            cm._serialize(stream)
            slen = stream.tell()
            stream.seek(0)

            cm.binsha = make_object(IStream(Commit.type, slen, stream)).binsha
        # END commit creation
        elapsed = time() - st

        print(
            "Serialized %i commits to loose objects in %f s ( %f commits / s )"
            % (nc, elapsed, nc / elapsed),
            file=sys.stderr)
Example #6
0
def rev_parse(repo, rev):
    """
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification as string, please see
        http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
        for details
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
            continue
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
            else:
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                else:
                    obj = name_to_object(repo, rev[:start])
                # END handle token
            # END handle refname

            if ref is not None:
                obj = ref.commit
            # END handle ref
        # END initialize obj on first token

        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" %
                                 rev)
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                try:
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass  # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                else:
                    # cannot do anything for non-tags
                    pass
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                try:
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError(
                        "Support for additional @{...} modes not implemented")
                # END handle revlog index

                try:
                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                # END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
            else:
                raise ValueError("Invalid output type: %s ( in %s )" %
                                 (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError(
                    "Could not accommodate requested object type %r, got %s" %
                    (output_type, obj.type))
            # END verify output type

            start = end + 1  # skip brace
            parsed_to = start
            continue
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                else:
                    break
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode

        parsed_to = start
        # handle hierarchy walk
        try:
            if token == "~":
                obj = to_commit(obj)
                for _ in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
            else:
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadName(
                "Invalid revision spec '%s' - not enough parent commits to reach '%s%i'"
                % (rev, token, num))
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError(
            "Didn't consume complete rev spec %s, consumed part: %s" %
            (rev, rev[:parsed_to]))

    return obj
Example #7
0
    def move(self, items, skip_errors=False, **kwargs):
        """Rename/move the items, whereas the last item is considered the destination of
        the move operation. If the destination is a file, the first item ( of two )
        must be a file as well. If the destination is a directory, it may be preceeded
        by one or more directories or files.

        The working tree will be affected in non-bare repositories.

        :parma items:
            Multiple types of items are supported, please see the 'remove' method
            for reference.
        :param skip_errors:
            If True, errors such as ones resulting from missing source files will
            be skpped.
        :param kwargs:
            Additional arguments you would like to pass to git-mv, such as dry_run
            or force.

        :return:List(tuple(source_path_string, destination_path_string), ...)
            A list of pairs, containing the source file moved as well as its
            actual destination. Relative to the repository root.

        :raise ValueErorr: If only one item was given
            GitCommandError: If git could not handle your request"""
        args = list()
        if skip_errors:
            args.append('-k')

        paths = self._items_to_rela_paths(items)
        if len(paths) < 2:
            raise ValueError(
                "Please provide at least one source and one destination of the move operation"
            )

        was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None))
        kwargs['dry_run'] = True

        # first execute rename in dryrun so the command tells us what it actually does
        # ( for later output )
        out = list()
        mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines()

        # parse result - first 0:n/2 lines are 'checking ', the remaining ones
        # are the 'renaming' ones which we parse
        for ln in xrange(int(len(mvlines) / 2), len(mvlines)):
            tokens = mvlines[ln].split(' to ')
            assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]

            # [0] = Renaming x
            # [1] = y
            out.append((tokens[0][9:], tokens[1]))
        # END for each line to parse

        # either prepare for the real run, or output the dry-run result
        if was_dry_run:
            return out
        # END handle dryrun

        # now apply the actual operation
        kwargs.pop('dry_run')
        self.repo.git.mv(args, paths, **kwargs)

        return out
Example #8
0
    def move(self, items, skip_errors=False, **kwargs):
        """Rename/move the items, whereas the last item is considered the destination of
        the move operation. If the destination is a file, the first item ( of two )
        must be a file as well. If the destination is a directory, it may be preceeded
        by one or more directories or files.

        The working tree will be affected in non-bare repositories.

        :parma items:
            Multiple types of items are supported, please see the 'remove' method
            for reference.
        :param skip_errors:
            If True, errors such as ones resulting from missing source files will
            be skpped.
        :param kwargs:
            Additional arguments you would like to pass to git-mv, such as dry_run
            or force.

        :return:List(tuple(source_path_string, destination_path_string), ...)
            A list of pairs, containing the source file moved as well as its
            actual destination. Relative to the repository root.

        :raise ValueErorr: If only one item was given
            GitCommandError: If git could not handle your request"""
        args = list()
        if skip_errors:
            args.append('-k')

        paths = self._items_to_rela_paths(items)
        if len(paths) < 2:
            raise ValueError("Please provide at least one source and one destination of the move operation")

        was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None))
        kwargs['dry_run'] = True

        # first execute rename in dryrun so the command tells us what it actually does
        # ( for later output )
        out = list()
        mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines()

        # parse result - first 0:n/2 lines are 'checking ', the remaining ones
        # are the 'renaming' ones which we parse
        for ln in xrange(int(len(mvlines) / 2), len(mvlines)):
            tokens = mvlines[ln].split(' to ')
            assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]

            # [0] = Renaming x
            # [1] = y
            out.append((tokens[0][9:], tokens[1]))
        # END for each line to parse

        # either prepare for the real run, or output the dry-run result
        if was_dry_run:
            return out
        # END handle dryrun

        # now apply the actual operation
        kwargs.pop('dry_run')
        self.repo.git.mv(args, paths, **kwargs)

        return out
Example #9
0
def rev_parse(repo, rev):
    """
    :return: Object at the given revision, either Commit, Tag, Tree or Blob
    :param rev: git-rev-parse compatible revision specification as string, please see
        http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
        for details
    :raise BadObject: if the given revision could not be found
    :raise ValueError: If rev couldn't be parsed
    :raise IndexError: If invalid reflog index is specified"""

    # colon search mode ?
    if rev.startswith(':/'):
        # colon search mode
        raise NotImplementedError("commit by message search ( regex )")
    # END handle search

    obj = None
    ref = None
    output_type = "commit"
    start = 0
    parsed_to = 0
    lr = len(rev)
    while start < lr:
        if rev[start] not in "^~:@":
            start += 1
            continue
        # END handle start

        token = rev[start]

        if obj is None:
            # token is a rev name
            if start == 0:
                ref = repo.head.ref
            else:
                if token == '@':
                    ref = name_to_object(repo, rev[:start], return_ref=True)
                else:
                    obj = name_to_object(repo, rev[:start])
                # END handle token
            # END handle refname

            if ref is not None:
                obj = ref.commit
            # END handle ref
        # END initialize obj on first token

        start += 1

        # try to parse {type}
        if start < lr and rev[start] == '{':
            end = rev.find('}', start)
            if end == -1:
                raise ValueError("Missing closing brace to define type in %s" % rev)
            output_type = rev[start + 1:end]  # exclude brace

            # handle type
            if output_type == 'commit':
                pass  # default
            elif output_type == 'tree':
                try:
                    obj = to_commit(obj).tree
                except (AttributeError, ValueError):
                    pass    # error raised later
                # END exception handling
            elif output_type in ('', 'blob'):
                if obj.type == 'tag':
                    obj = deref_tag(obj)
                else:
                    # cannot do anything for non-tags
                    pass
                # END handle tag
            elif token == '@':
                # try single int
                assert ref is not None, "Requre Reference to access reflog"
                revlog_index = None
                try:
                    # transform reversed index into the format of our revlog
                    revlog_index = -(int(output_type) + 1)
                except ValueError:
                    # TODO: Try to parse the other date options, using parse_date
                    # maybe
                    raise NotImplementedError("Support for additional @{...} modes not implemented")
                # END handle revlog index

                try:
                    entry = ref.log_entry(revlog_index)
                except IndexError:
                    raise IndexError("Invalid revlog index: %i" % revlog_index)
                # END handle index out of bound

                obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))

                # make it pass the following checks
                output_type = None
            else:
                raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
            # END handle output type

            # empty output types don't require any specific type, its just about dereferencing tags
            if output_type and obj.type != output_type:
                raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
            # END verify ouput type

            start = end + 1                   # skip brace
            parsed_to = start
            continue
        # END parse type

        # try to parse a number
        num = 0
        if token != ":":
            found_digit = False
            while start < lr:
                if rev[start] in digits:
                    num = num * 10 + int(rev[start])
                    start += 1
                    found_digit = True
                else:
                    break
                # END handle number
            # END number parse loop

            # no explicit number given, 1 is the default
            # It could be 0 though
            if not found_digit:
                num = 1
            # END set default num
        # END number parsing only if non-blob mode

        parsed_to = start
        # handle hiererarchy walk
        try:
            if token == "~":
                obj = to_commit(obj)
                for item in xrange(num):
                    obj = obj.parents[0]
                # END for each history item to walk
            elif token == "^":
                obj = to_commit(obj)
                # must be n'th parent
                if num:
                    obj = obj.parents[num - 1]
            elif token == ":":
                if obj.type != "tree":
                    obj = obj.tree
                # END get tree type
                obj = obj[rev[start:]]
                parsed_to = lr
            else:
                raise ValueError("Invalid token: %r" % token)
            # END end handle tag
        except (IndexError, AttributeError):
            raise BadName("Invalid revision spec '%s' - not enough parent commits to reach '%s%i'" % (rev, token, num))
        # END exception handling
    # END parse loop

    # still no obj ? Its probably a simple name
    if obj is None:
        obj = name_to_object(repo, rev)
        parsed_to = lr
    # END handle simple name

    if obj is None:
        raise ValueError("Revision specifier could not be parsed: %s" % rev)

    if parsed_to != lr:
        raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))

    return obj