Example #1
0
    class ReaderProgress(object):
        def __init__(self, reader, length=None):
            self._reader = reader
            self._length = length
            self._read = 0
            self._pos = 0
            self._buf = ''
            self._progress = Progress(' {}%' if self._length else ' {} bytes')

        def read(self, length):
            # See comment above tell
            if self._pos < self._read:
                assert self._read - self._pos <= 8
                assert length <= len(self._buf)
                data = self._buf[:length]
                self._buf = self._buf[length:]
                self._pos += length
            else:
                assert self._read == self._pos
                data = self._reader.read(length)
                self._read += len(data)
                self._pos = self._read
                # Keep the last 8 bytes we read for GzipFile
                self._buf = data[-8:]
            self.progress()
            return data

        def progress(self):
            if self._length:
                count = self._read * 100 / self._length
            else:
                count = self._read
            self._progress.progress(count)

        def finish(self):
            self._progress.finish()

        # GzipFile wants to seek to the end of the file and back, so we add
        # enough tell/seek support to make it happy. It also rewinds 8 bytes
        # for the CRC, so we also handle that.
        def tell(self):
            return self._pos

        def seek(self, pos, how=os.SEEK_SET):
            if how == os.SEEK_END:
                self._pos = self._length + pos
            elif how == os.SEEK_SET:
                self._pos = pos
            elif how == os.SEEK_CUR:
                self._pos += pos
            else:
                raise NotImplementedError()
            return self._pos
Example #2
0
    class ReaderProgress(object):
        def __init__(self, reader, length=None):
            self._reader = reader
            self._length = length
            self._read = 0
            self._pos = 0
            self._buf = ''
            self._progress = Progress(' {}%' if self._length else ' {} bytes')

        def read(self, length):
            # See comment above tell
            if self._pos < self._read:
                assert self._read - self._pos <= 8
                assert length <= len(self._buf)
                data = self._buf[:length]
                self._buf = self._buf[length:]
                self._pos += length
            else:
                assert self._read == self._pos
                data = self._reader.read(length)
                self._read += len(data)
                self._pos = self._read
                # Keep the last 8 bytes we read for GzipFile
                self._buf = data[-8:]
            self.progress()
            return data

        def progress(self):
            if self._length:
                count = self._read * 100 // self._length
            else:
                count = self._read
            self._progress.progress(count)

        def finish(self):
            self._progress.finish()

        # GzipFile wants to seek to the end of the file and back, so we add
        # enough tell/seek support to make it happy. It also rewinds 8 bytes
        # for the CRC, so we also handle that.
        def tell(self):
            return self._pos

        def seek(self, pos, how=os.SEEK_SET):
            if how == os.SEEK_END:
                self._pos = self._length + pos
            elif how == os.SEEK_SET:
                self._pos = pos
            elif how == os.SEEK_CUR:
                self._pos += pos
            else:
                raise NotImplementedError()
            return self._pos
Example #3
0
    class ReaderProgress(object):
        def __init__(self, reader, length=None):
            self._reader = reader
            self._length = length
            self._read = 0
            self._progress = Progress(' {}%' if self._length else ' {} bytes')

        def read(self, length):
            data = self._reader.read(length)
            self._read += len(data)
            if self._length:
                count = self._read * 100 // self._length
            else:
                count = self._read
            self._progress.progress(count)
            return data

        def finish(self):
            self._progress.finish()
Example #4
0
def fsck_quick(force=False):
    status = FsckStatus()
    store = GitHgStore()

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if not metadata_commit:
        status.info('There does not seem to be any git-cinnabar metadata.\n'
                    'Is this a git-cinnabar clone?')
        return 1
    broken_metadata = Git.resolve_ref('refs/cinnabar/broken')
    checked_metadata = Git.resolve_ref('refs/cinnabar/checked')
    if checked_metadata == broken_metadata:
        checked_metadata = None
    if metadata_commit == checked_metadata and not force:
        status.info('The git-cinnabar metadata was already checked and is '
                    'presumably clean.\n'
                    'Try `--force` if you want to check anyways.')
        return 0
    elif force:
        checked_metadata = None

    commit = GitCommit(metadata_commit)
    if commit.body != 'files-meta unified-manifests-v2':
        status.info(
            'The git-cinnabar metadata is incompatible with this version.\n'
            'Please use the git-cinnabar version it was used with last.\n')
        return 1
    if len(commit.parents) > 6 or len(commit.parents) < 5:
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1
    changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5]

    commit = GitCommit(changesets)
    heads = OrderedDict(
        (node, branch)
        for node, _, branch in (d.partition(' ')
                                for d in commit.body.splitlines()))
    if len(heads) != len(commit.parents):
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1

    manifest_nodes = []

    parents = None
    fix_changeset_heads = False

    def get_checked_metadata(num):
        if not checked_metadata:
            return None
        commit = Git.resolve_ref('{}^{}'.format(checked_metadata, num))
        if commit:
            return GitCommit(commit)

    checked_commit = get_checked_metadata(1)
    # TODO: Check that the recorded heads are actually dag heads.
    for c, changeset_node in progress_iter(
            'Checking {} changeset heads',
        ((c, node) for c, node in izip(commit.parents, heads)
         if not checked_commit or c not in checked_commit.parents)):
        gitsha1 = GitHgHelper.hg2git(changeset_node)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for changeset %s' %
                          changeset_node)
            continue
        if gitsha1 != c:
            if parents is None:
                parents = set(commit.parents)
            if gitsha1 not in parents:
                status.report('Inconsistent metadata:\n'
                              '  Head metadata says changeset %s maps to %s\n'
                              '  but hg2git metadata says it maps to %s' %
                              (changeset_node, c, gitsha1))
                continue
            fix_changeset_heads = True
        changeset = store._changeset(c, include_parents=True)
        if not changeset:
            status.report('Missing git2hg metadata for git commit %s' % c)
            continue
        if changeset.node != changeset_node:
            if changeset.node not in heads:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says %s maps to changeset %s\n'
                    '  but git2hg metadata says it maps to changeset %s' %
                    (c, changeset_node, changeset.node))
                continue
            fix_changeset_heads = True
        if changeset.node != changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset.node)
            continue
        changeset_branch = changeset.branch or 'default'
        if heads[changeset.node] != changeset_branch:
            status.report(
                'Inconsistent metadata:\n'
                '  Head metadata says changeset %s is in branch %s\n'
                '  but git2hg metadata says it is in branch %s' %
                (changeset.node, heads[changeset.node], changeset_branch))
            continue
        manifest_nodes.append(changeset.manifest)

    if status('broken'):
        return 1

    # Rebuilding manifests benefits from limiting the difference with
    # the last rebuilt manifest. Similarly, building the list of unique
    # files in all manifests benefits from that too.
    # Unfortunately, the manifest heads are not ordered in a topological
    # relevant matter, and the differences between two consecutive manifests
    # can be much larger than they could be. The consequence is spending a
    # large amount of time rebuilding the manifests and gathering the files
    # list. It's actually faster to attempt to reorder them according to
    # some heuristics first, such that the differences are smaller.
    # Here, we use the depth from the root node(s) to reorder the manifests.
    # This doesn't give the most optimal ordering, but it's already much
    # faster. On a clone of multiple mozilla-* repositories with > 1400 heads,
    # it's close to an order of magnitude difference on the "Checking
    # manifests" loop.
    depths = {}
    roots = {}
    manifest_queue = []
    revs = []
    revs.append('{}^@'.format(manifests))
    if checked_metadata:
        revs.append('^{}^2^@'.format(checked_metadata))
    for m, _, parents in progress_iter(
            'Loading {} manifests',
            GitHgHelper.rev_list('--topo-order', '--reverse', '--full-history',
                                 *revs)):
        manifest_queue.append((m, parents))
        if parents:
            depth = {}
            for p in parents:
                for root, num in depths.get(p, {}).iteritems():
                    if root in depth:
                        depth[root] = max(depth[root], num + 1)
                    else:
                        depth[root] = num + 1
            if depth:
                depths[m] = depth
                del depth
                continue
        depths[m] = {m: 0}
        roots[m] = parents

    if status('broken'):
        return 1

    # TODO: check that all manifest_nodes gathered above are available in the
    # manifests dag, and that the dag heads are the recorded heads.
    manifests_commit = GitCommit(manifests)
    checked_commit = get_checked_metadata(2)
    depths = [([depths[p].get(r, 0) for r in roots], p)
              for p in manifests_commit.parents
              if not checked_commit or p not in checked_commit.parents]
    manifests_commit_parents = [p for _, p in sorted(depths)]
    previous = None
    all_interesting = set()
    for m in progress_iter('Checking {} manifest heads',
                           manifests_commit_parents):
        c = GitCommit(m)
        if not SHA1_RE.match(c.body):
            status.report('Invalid manifest metadata in git commit %s' % m)
            continue
        gitsha1 = GitHgHelper.hg2git(c.body)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for manifest %s' % c.body)
            continue
        if not GitHgHelper.check_manifest(c.body):
            status.report('Sha1 mismatch for manifest %s' % c.body)

        files = {}
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, m):
                if d in 'AM' and before != after and \
                        (path, after) not in all_interesting:
                    files[path] = after
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True):
                if (path, sha1) not in all_interesting:
                    files[path] = sha1
        all_interesting.update(files.iteritems())
        previous = m

    if status('broken'):
        return 1

    # Don't check files that were already there in the previously checked
    # manifests.
    previous = None
    for parents in roots.itervalues():
        for p in parents:
            if previous:
                for _, _, before, after, d, path in GitHgHelper.diff_tree(
                        previous, p):
                    if d in 'AM' and before != after:
                        all_interesting.discard((path, after))
            else:
                for _, t, sha1, path in GitHgHelper.ls_tree(p, recursive=True):
                    all_interesting.discard((path, sha1))
            previous = p

    progress = Progress('Checking {} files')
    while all_interesting and manifest_queue:
        (m, parents) = manifest_queue.pop()
        changes = get_changes(m, parents, all=True)
        for path, hg_file, hg_fileparents in changes:
            if hg_fileparents[1:] == (hg_file, ):
                continue
            elif hg_fileparents[:1] == (hg_file, ):
                continue
            # Reaching here means the file received a modification compared
            # to its parents. If it's a file we're going to check below,
            # it means we don't need to check its parents if somehow they were
            # going to be checked. If it's not a file we're going to check
            # below, it's because it's either a file we weren't interested in
            # in the first place, or it's the parent of a file we have checked.
            # Either way, we aren't interested in the parents.
            for p in hg_fileparents:
                all_interesting.discard((path, p))
            if (path, hg_file) not in all_interesting:
                continue
            all_interesting.remove((path, hg_file))
            if not GitHgHelper.check_file(hg_file, *hg_fileparents):
                p = store.manifest_path(path)
                status.report('Sha1 mismatch for file %s\n'
                              '  revision %s' % (p, hg_file))

                print_parents = ' '.join(p for p in hg_fileparents
                                         if p != NULL_NODE_ID)
                if print_parents:
                    status.report('  with parent%s %s' %
                                  ('s' if len(print_parents) > 41 else '',
                                   print_parents))
            progress.progress()
    progress.finish()
    if all_interesting:
        status.info('Could not find the following files:')
        for path, sha1 in sorted(all_interesting):
            p = store.manifest_path(path)
            status.info('  %s %s' % (sha1, path))
        status.info('This might be a bug in `git cinnabar fsck`. Please open '
                    'an issue, with the message above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        return 1

    if status('broken'):
        status.info('Your git-cinnabar repository appears to be corrupted.\n'
                    'Please open an issue, with the information above, on\n'
                    'https://github.com/glandium/git-cinnabar/issues')
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        if checked_metadata:
            status.info(
                '\nThen please try to run `git cinnabar rollback --fsck` to '
                'restore last known state, and to update from the mercurial '
                'repository.')
        else:
            status.info('\nThen please try to run `git cinnabar reclone`.')
        status.info(
            '\nPlease note this may affect the commit sha1s of mercurial '
            'changesets, and may require to rebase your local branches.')
        status.info(
            '\nAlternatively, you may start afresh with a new clone. In any '
            'case, please keep this corrupted repository around for further '
            'debugging.')
        return 1

    refresh = []
    if fix_changeset_heads:
        status.fix('Fixing changeset heads metadata order.')
        refresh.append('refs/cinnabar/changesets')
    interval_expired('fsck', 0)
    store.close(refresh=refresh)
    GitHgHelper._helper = False
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    Git.update_ref('refs/cinnabar/checked', metadata_commit)
    return 0
Example #5
0
def fsck_quick():
    status = FsckStatus()
    store = GitHgStore()

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if not metadata_commit:
        status.info(
            'There does not seem to be any git-cinnabar metadata.\n'
            'Is this a git-cinnabar clone?'
        )
        return 1
    commit = GitCommit(metadata_commit)
    if commit.body != 'files-meta unified-manifests-v2':
        status.info(
            'The git-cinnabar metadata is incompatible with this version.\n'
            'Please use the git-cinnabar version it was used with last.\n'
        )
        return 1
    if len(commit.parents) > 6 or len(commit.parents) < 5:
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1
    changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5]

    commit = GitCommit(changesets)
    heads = OrderedDict(
        (node, branch)
        for node, _, branch in (d.partition(' ')
                                for d in commit.body.splitlines()))
    if len(heads) != len(commit.parents):
        status.report('The git-cinnabar metadata seems to be corrupted in '
                      'unexpected ways.\n')
        return 1

    manifest_nodes = []

    parents = None
    fix_changeset_heads = False
    # TODO: Check that the recorded heads are actually dag heads.
    for c, changeset_node in progress_iter(
            'Checking {} changeset heads', izip(commit.parents, heads)):
        gitsha1 = GitHgHelper.hg2git(changeset_node)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for changeset %s'
                          % changeset_node)
            continue
        if gitsha1 != c:
            if parents is None:
                parents = set(commit.parents)
            if gitsha1 not in parents:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says changeset %s maps to %s\n'
                    '  but hg2git metadata says it maps to %s'
                    % (changeset_node, c, gitsha1))
                continue
            fix_changeset_heads = True
        changeset = store._changeset(c, include_parents=True)
        if not changeset:
            status.report('Missing git2hg metadata for git commit %s' % c)
            continue
        if changeset.node != changeset_node:
            if changeset.node not in heads:
                status.report(
                    'Inconsistent metadata:\n'
                    '  Head metadata says %s maps to changeset %s\n'
                    '  but git2hg metadata says it maps to changeset %s'
                    % (c, changeset_node, changeset.node))
                continue
            fix_changeset_heads = True
        if changeset.node != changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset.node)
            continue
        changeset_branch = changeset.branch or 'default'
        if heads[changeset.node] != changeset_branch:
            status.report(
                'Inconsistent metadata:\n'
                '  Head metadata says changeset %s is in branch %s\n'
                '  but git2hg metadata says it is in branch %s'
                % (changeset.node, heads[changeset.node], changeset_branch))
            continue
        manifest_nodes.append(changeset.manifest)

    if status('broken'):
        return 1

    # Rebuilding manifests benefits from limiting the difference with
    # the last rebuilt manifest. Similarly, building the list of unique
    # files in all manifests benefits from that too.
    # Unfortunately, the manifest heads are not ordered in a topological
    # relevant matter, and the differences between two consecutive manifests
    # can be much larger than they could be. The consequence is spending a
    # large amount of time rebuilding the manifests and gathering the files
    # list. It's actually faster to attempt to reorder them according to
    # some heuristics first, such that the differences are smaller.
    # Here, we use the depth from the root node(s) to reorder the manifests.
    # This doesn't give the most optimal ordering, but it's already much
    # faster. On a clone of multiple mozilla-* repositories with > 1400 heads,
    # it's close to an order of magnitude difference on the "Checking
    # manifests" loop.
    depths = {}
    roots = []
    manifest_queue = []
    for m, _, parents in progress_iter(
            'Loading {} manifests', GitHgHelper.rev_list(
                '--topo-order', '--reverse', '--full-history', '%s^@'
                % manifests)):
        manifest_queue.append((m, parents))
        if parents:
            depth = {}
            for p in parents:
                for root, num in depths[p].iteritems():
                    if root in depth:
                        depth[root] = max(depth[root], num + 1)
                    else:
                        depth[root] = num + 1
            depths[m] = depth
            del depth
        else:
            depths[m] = {m: 0}
            roots.append(m)

    if status('broken'):
        return 1

    # TODO: check that all manifest_nodes gathered above are available in the
    # manifests dag, and that the dag heads are the recorded heads.
    manifests_commit = GitCommit(manifests)
    depths = [
        [depths[p].get(r, 0) for r in roots]
        for p in manifests_commit.parents
    ]
    manifests_commit_parents = [
        p for _, p in sorted(zip(depths, manifests_commit.parents))
    ]
    previous = None
    all_interesting = set()
    for m in progress_iter('Checking {} manifest heads',
                           manifests_commit_parents):
        c = GitCommit(m)
        if not SHA1_RE.match(c.body):
            status.report('Invalid manifest metadata in git commit %s' % m)
            continue
        gitsha1 = GitHgHelper.hg2git(c.body)
        if gitsha1 == NULL_NODE_ID:
            status.report('Missing hg2git metadata for manifest %s' % c.body)
            continue
        if not GitHgHelper.check_manifest(c.body):
            status.report('Sha1 mismatch for manifest %s' % c.body)

        files = {}
        if previous:
            for _, _, before, after, d, path in GitHgHelper.diff_tree(
                    previous, m):
                if d in 'AM' and before != after and \
                        (path, after) not in all_interesting:
                    files[path] = after
        else:
            for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True):
                if (path, sha1) not in all_interesting:
                    files[path] = sha1
        all_interesting.update(files.iteritems())
        previous = m

    if status('broken'):
        return 1

    progress = Progress('Checking {} files')
    while all_interesting and manifest_queue:
        (m, parents) = manifest_queue.pop()
        changes = get_changes(m, parents, all=True)
        for path, hg_file, hg_fileparents in changes:
            if hg_fileparents[1:] == (hg_file,):
                continue
            elif hg_fileparents[:1] == (hg_file,):
                continue
            # Reaching here means the file received a modification compared
            # to its parents. If it's a file we're going to check below,
            # it means we don't need to check its parents if somehow they were
            # going to be checked. If it's not a file we're going to check
            # below, it's because it's either a file we weren't interested in
            # in the first place, or it's the parent of a file we have checked.
            # Either way, we aren't interested in the parents.
            for p in hg_fileparents:
                all_interesting.discard((path, p))
            if (path, hg_file) not in all_interesting:
                continue
            all_interesting.remove((path, hg_file))
            if not GitHgHelper.check_file(hg_file, *hg_fileparents):
                p = store.manifest_path(path)
                status.report(
                    'Sha1 mismatch for file %s\n'
                    '  revision %s' % (p, hg_file))

                print_parents = ' '.join(p for p in hg_fileparents
                                         if p != NULL_NODE_ID)
                if print_parents:
                    status.report('  with parent%s %s' % (
                        's' if len(print_parents) > 41 else '',
                        print_parents))
            progress.progress()
    progress.finish()
    if all_interesting:
        status.info('Could not find the following files:')
        for path, sha1 in sorted(all_interesting):
            p = store.manifest_path(path)
            status.info('  %s %s' % (sha1, path))
        status.info(
            'This might be a bug in `git cinnabar fsck`. Please open '
            'an issue, with the message above, on\n'
            'https://github.com/glandium/git-cinnabar/issues')
        return 1

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted.\n'
            'Please open an issue, with the information above, on\n'
            'https://github.com/glandium/git-cinnabar/issues')
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        if Git.resolve_ref('refs/cinnabar/checked'):
            status.info(
                '\nThen please try to run `git cinnabar rollback --fsck` to '
                'restore last known state, and to update from the mercurial '
                'repository.')
        else:
            status.info('\nThen please try to run `git cinnabar reclone`.')
        status.info(
            '\nPlease note this may affect the commit sha1s of mercurial '
            'changesets, and may require to rebase your local branches.')
        status.info(
            '\nAlternatively, you may start afresh with a new clone. In any '
            'case, please keep this corrupted repository around for further '
            'debugging.')
        return 1

    refresh = []
    if fix_changeset_heads:
        status.fix('Fixing changeset heads metadata order.')
        refresh.append('refs/cinnabar/changesets')
    interval_expired('fsck', 0)
    store.close(refresh=refresh)
    GitHgHelper._helper = False
    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    Git.update_ref('refs/cinnabar/checked', metadata_commit)
    return 0