class ReaderProgress(object): def __init__(self, reader, length=None): self._reader = reader self._length = length self._read = 0 self._pos = 0 self._buf = '' self._progress = Progress(' {}%' if self._length else ' {} bytes') def read(self, length): # See comment above tell if self._pos < self._read: assert self._read - self._pos <= 8 assert length <= len(self._buf) data = self._buf[:length] self._buf = self._buf[length:] self._pos += length else: assert self._read == self._pos data = self._reader.read(length) self._read += len(data) self._pos = self._read # Keep the last 8 bytes we read for GzipFile self._buf = data[-8:] self.progress() return data def progress(self): if self._length: count = self._read * 100 / self._length else: count = self._read self._progress.progress(count) def finish(self): self._progress.finish() # GzipFile wants to seek to the end of the file and back, so we add # enough tell/seek support to make it happy. It also rewinds 8 bytes # for the CRC, so we also handle that. def tell(self): return self._pos def seek(self, pos, how=os.SEEK_SET): if how == os.SEEK_END: self._pos = self._length + pos elif how == os.SEEK_SET: self._pos = pos elif how == os.SEEK_CUR: self._pos += pos else: raise NotImplementedError() return self._pos
class ReaderProgress(object): def __init__(self, reader, length=None): self._reader = reader self._length = length self._read = 0 self._pos = 0 self._buf = '' self._progress = Progress(' {}%' if self._length else ' {} bytes') def read(self, length): # See comment above tell if self._pos < self._read: assert self._read - self._pos <= 8 assert length <= len(self._buf) data = self._buf[:length] self._buf = self._buf[length:] self._pos += length else: assert self._read == self._pos data = self._reader.read(length) self._read += len(data) self._pos = self._read # Keep the last 8 bytes we read for GzipFile self._buf = data[-8:] self.progress() return data def progress(self): if self._length: count = self._read * 100 // self._length else: count = self._read self._progress.progress(count) def finish(self): self._progress.finish() # GzipFile wants to seek to the end of the file and back, so we add # enough tell/seek support to make it happy. It also rewinds 8 bytes # for the CRC, so we also handle that. def tell(self): return self._pos def seek(self, pos, how=os.SEEK_SET): if how == os.SEEK_END: self._pos = self._length + pos elif how == os.SEEK_SET: self._pos = pos elif how == os.SEEK_CUR: self._pos += pos else: raise NotImplementedError() return self._pos
class ReaderProgress(object): def __init__(self, reader, length=None): self._reader = reader self._length = length self._read = 0 self._progress = Progress(' {}%' if self._length else ' {} bytes') def read(self, length): data = self._reader.read(length) self._read += len(data) if self._length: count = self._read * 100 // self._length else: count = self._read self._progress.progress(count) return data def finish(self): self._progress.finish()
def fsck_quick(force=False): status = FsckStatus() store = GitHgStore() metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') if not metadata_commit: status.info('There does not seem to be any git-cinnabar metadata.\n' 'Is this a git-cinnabar clone?') return 1 broken_metadata = Git.resolve_ref('refs/cinnabar/broken') checked_metadata = Git.resolve_ref('refs/cinnabar/checked') if checked_metadata == broken_metadata: checked_metadata = None if metadata_commit == checked_metadata and not force: status.info('The git-cinnabar metadata was already checked and is ' 'presumably clean.\n' 'Try `--force` if you want to check anyways.') return 0 elif force: checked_metadata = None commit = GitCommit(metadata_commit) if commit.body != 'files-meta unified-manifests-v2': status.info( 'The git-cinnabar metadata is incompatible with this version.\n' 'Please use the git-cinnabar version it was used with last.\n') return 1 if len(commit.parents) > 6 or len(commit.parents) < 5: status.report('The git-cinnabar metadata seems to be corrupted in ' 'unexpected ways.\n') return 1 changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5] commit = GitCommit(changesets) heads = OrderedDict( (node, branch) for node, _, branch in (d.partition(' ') for d in commit.body.splitlines())) if len(heads) != len(commit.parents): status.report('The git-cinnabar metadata seems to be corrupted in ' 'unexpected ways.\n') return 1 manifest_nodes = [] parents = None fix_changeset_heads = False def get_checked_metadata(num): if not checked_metadata: return None commit = Git.resolve_ref('{}^{}'.format(checked_metadata, num)) if commit: return GitCommit(commit) checked_commit = get_checked_metadata(1) # TODO: Check that the recorded heads are actually dag heads. for c, changeset_node in progress_iter( 'Checking {} changeset heads', ((c, node) for c, node in izip(commit.parents, heads) if not checked_commit or c not in checked_commit.parents)): gitsha1 = GitHgHelper.hg2git(changeset_node) if gitsha1 == NULL_NODE_ID: status.report('Missing hg2git metadata for changeset %s' % changeset_node) continue if gitsha1 != c: if parents is None: parents = set(commit.parents) if gitsha1 not in parents: status.report('Inconsistent metadata:\n' ' Head metadata says changeset %s maps to %s\n' ' but hg2git metadata says it maps to %s' % (changeset_node, c, gitsha1)) continue fix_changeset_heads = True changeset = store._changeset(c, include_parents=True) if not changeset: status.report('Missing git2hg metadata for git commit %s' % c) continue if changeset.node != changeset_node: if changeset.node not in heads: status.report( 'Inconsistent metadata:\n' ' Head metadata says %s maps to changeset %s\n' ' but git2hg metadata says it maps to changeset %s' % (c, changeset_node, changeset.node)) continue fix_changeset_heads = True if changeset.node != changeset.sha1: status.report('Sha1 mismatch for changeset %s' % changeset.node) continue changeset_branch = changeset.branch or 'default' if heads[changeset.node] != changeset_branch: status.report( 'Inconsistent metadata:\n' ' Head metadata says changeset %s is in branch %s\n' ' but git2hg metadata says it is in branch %s' % (changeset.node, heads[changeset.node], changeset_branch)) continue manifest_nodes.append(changeset.manifest) if status('broken'): return 1 # Rebuilding manifests benefits from limiting the difference with # the last rebuilt manifest. Similarly, building the list of unique # files in all manifests benefits from that too. # Unfortunately, the manifest heads are not ordered in a topological # relevant matter, and the differences between two consecutive manifests # can be much larger than they could be. The consequence is spending a # large amount of time rebuilding the manifests and gathering the files # list. It's actually faster to attempt to reorder them according to # some heuristics first, such that the differences are smaller. # Here, we use the depth from the root node(s) to reorder the manifests. # This doesn't give the most optimal ordering, but it's already much # faster. On a clone of multiple mozilla-* repositories with > 1400 heads, # it's close to an order of magnitude difference on the "Checking # manifests" loop. depths = {} roots = {} manifest_queue = [] revs = [] revs.append('{}^@'.format(manifests)) if checked_metadata: revs.append('^{}^2^@'.format(checked_metadata)) for m, _, parents in progress_iter( 'Loading {} manifests', GitHgHelper.rev_list('--topo-order', '--reverse', '--full-history', *revs)): manifest_queue.append((m, parents)) if parents: depth = {} for p in parents: for root, num in depths.get(p, {}).iteritems(): if root in depth: depth[root] = max(depth[root], num + 1) else: depth[root] = num + 1 if depth: depths[m] = depth del depth continue depths[m] = {m: 0} roots[m] = parents if status('broken'): return 1 # TODO: check that all manifest_nodes gathered above are available in the # manifests dag, and that the dag heads are the recorded heads. manifests_commit = GitCommit(manifests) checked_commit = get_checked_metadata(2) depths = [([depths[p].get(r, 0) for r in roots], p) for p in manifests_commit.parents if not checked_commit or p not in checked_commit.parents] manifests_commit_parents = [p for _, p in sorted(depths)] previous = None all_interesting = set() for m in progress_iter('Checking {} manifest heads', manifests_commit_parents): c = GitCommit(m) if not SHA1_RE.match(c.body): status.report('Invalid manifest metadata in git commit %s' % m) continue gitsha1 = GitHgHelper.hg2git(c.body) if gitsha1 == NULL_NODE_ID: status.report('Missing hg2git metadata for manifest %s' % c.body) continue if not GitHgHelper.check_manifest(c.body): status.report('Sha1 mismatch for manifest %s' % c.body) files = {} if previous: for _, _, before, after, d, path in GitHgHelper.diff_tree( previous, m): if d in 'AM' and before != after and \ (path, after) not in all_interesting: files[path] = after else: for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True): if (path, sha1) not in all_interesting: files[path] = sha1 all_interesting.update(files.iteritems()) previous = m if status('broken'): return 1 # Don't check files that were already there in the previously checked # manifests. previous = None for parents in roots.itervalues(): for p in parents: if previous: for _, _, before, after, d, path in GitHgHelper.diff_tree( previous, p): if d in 'AM' and before != after: all_interesting.discard((path, after)) else: for _, t, sha1, path in GitHgHelper.ls_tree(p, recursive=True): all_interesting.discard((path, sha1)) previous = p progress = Progress('Checking {} files') while all_interesting and manifest_queue: (m, parents) = manifest_queue.pop() changes = get_changes(m, parents, all=True) for path, hg_file, hg_fileparents in changes: if hg_fileparents[1:] == (hg_file, ): continue elif hg_fileparents[:1] == (hg_file, ): continue # Reaching here means the file received a modification compared # to its parents. If it's a file we're going to check below, # it means we don't need to check its parents if somehow they were # going to be checked. If it's not a file we're going to check # below, it's because it's either a file we weren't interested in # in the first place, or it's the parent of a file we have checked. # Either way, we aren't interested in the parents. for p in hg_fileparents: all_interesting.discard((path, p)) if (path, hg_file) not in all_interesting: continue all_interesting.remove((path, hg_file)) if not GitHgHelper.check_file(hg_file, *hg_fileparents): p = store.manifest_path(path) status.report('Sha1 mismatch for file %s\n' ' revision %s' % (p, hg_file)) print_parents = ' '.join(p for p in hg_fileparents if p != NULL_NODE_ID) if print_parents: status.report(' with parent%s %s' % ('s' if len(print_parents) > 41 else '', print_parents)) progress.progress() progress.finish() if all_interesting: status.info('Could not find the following files:') for path, sha1 in sorted(all_interesting): p = store.manifest_path(path) status.info(' %s %s' % (sha1, path)) status.info('This might be a bug in `git cinnabar fsck`. Please open ' 'an issue, with the message above, on\n' 'https://github.com/glandium/git-cinnabar/issues') return 1 if status('broken'): status.info('Your git-cinnabar repository appears to be corrupted.\n' 'Please open an issue, with the information above, on\n' 'https://github.com/glandium/git-cinnabar/issues') Git.update_ref('refs/cinnabar/broken', metadata_commit) if checked_metadata: status.info( '\nThen please try to run `git cinnabar rollback --fsck` to ' 'restore last known state, and to update from the mercurial ' 'repository.') else: status.info('\nThen please try to run `git cinnabar reclone`.') status.info( '\nPlease note this may affect the commit sha1s of mercurial ' 'changesets, and may require to rebase your local branches.') status.info( '\nAlternatively, you may start afresh with a new clone. In any ' 'case, please keep this corrupted repository around for further ' 'debugging.') return 1 refresh = [] if fix_changeset_heads: status.fix('Fixing changeset heads metadata order.') refresh.append('refs/cinnabar/changesets') interval_expired('fsck', 0) store.close(refresh=refresh) GitHgHelper._helper = False metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') Git.update_ref('refs/cinnabar/checked', metadata_commit) return 0
def fsck_quick(): status = FsckStatus() store = GitHgStore() metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') if not metadata_commit: status.info( 'There does not seem to be any git-cinnabar metadata.\n' 'Is this a git-cinnabar clone?' ) return 1 commit = GitCommit(metadata_commit) if commit.body != 'files-meta unified-manifests-v2': status.info( 'The git-cinnabar metadata is incompatible with this version.\n' 'Please use the git-cinnabar version it was used with last.\n' ) return 1 if len(commit.parents) > 6 or len(commit.parents) < 5: status.report('The git-cinnabar metadata seems to be corrupted in ' 'unexpected ways.\n') return 1 changesets, manifests, hg2git, git2hg, files_meta = commit.parents[:5] commit = GitCommit(changesets) heads = OrderedDict( (node, branch) for node, _, branch in (d.partition(' ') for d in commit.body.splitlines())) if len(heads) != len(commit.parents): status.report('The git-cinnabar metadata seems to be corrupted in ' 'unexpected ways.\n') return 1 manifest_nodes = [] parents = None fix_changeset_heads = False # TODO: Check that the recorded heads are actually dag heads. for c, changeset_node in progress_iter( 'Checking {} changeset heads', izip(commit.parents, heads)): gitsha1 = GitHgHelper.hg2git(changeset_node) if gitsha1 == NULL_NODE_ID: status.report('Missing hg2git metadata for changeset %s' % changeset_node) continue if gitsha1 != c: if parents is None: parents = set(commit.parents) if gitsha1 not in parents: status.report( 'Inconsistent metadata:\n' ' Head metadata says changeset %s maps to %s\n' ' but hg2git metadata says it maps to %s' % (changeset_node, c, gitsha1)) continue fix_changeset_heads = True changeset = store._changeset(c, include_parents=True) if not changeset: status.report('Missing git2hg metadata for git commit %s' % c) continue if changeset.node != changeset_node: if changeset.node not in heads: status.report( 'Inconsistent metadata:\n' ' Head metadata says %s maps to changeset %s\n' ' but git2hg metadata says it maps to changeset %s' % (c, changeset_node, changeset.node)) continue fix_changeset_heads = True if changeset.node != changeset.sha1: status.report('Sha1 mismatch for changeset %s' % changeset.node) continue changeset_branch = changeset.branch or 'default' if heads[changeset.node] != changeset_branch: status.report( 'Inconsistent metadata:\n' ' Head metadata says changeset %s is in branch %s\n' ' but git2hg metadata says it is in branch %s' % (changeset.node, heads[changeset.node], changeset_branch)) continue manifest_nodes.append(changeset.manifest) if status('broken'): return 1 # Rebuilding manifests benefits from limiting the difference with # the last rebuilt manifest. Similarly, building the list of unique # files in all manifests benefits from that too. # Unfortunately, the manifest heads are not ordered in a topological # relevant matter, and the differences between two consecutive manifests # can be much larger than they could be. The consequence is spending a # large amount of time rebuilding the manifests and gathering the files # list. It's actually faster to attempt to reorder them according to # some heuristics first, such that the differences are smaller. # Here, we use the depth from the root node(s) to reorder the manifests. # This doesn't give the most optimal ordering, but it's already much # faster. On a clone of multiple mozilla-* repositories with > 1400 heads, # it's close to an order of magnitude difference on the "Checking # manifests" loop. depths = {} roots = [] manifest_queue = [] for m, _, parents in progress_iter( 'Loading {} manifests', GitHgHelper.rev_list( '--topo-order', '--reverse', '--full-history', '%s^@' % manifests)): manifest_queue.append((m, parents)) if parents: depth = {} for p in parents: for root, num in depths[p].iteritems(): if root in depth: depth[root] = max(depth[root], num + 1) else: depth[root] = num + 1 depths[m] = depth del depth else: depths[m] = {m: 0} roots.append(m) if status('broken'): return 1 # TODO: check that all manifest_nodes gathered above are available in the # manifests dag, and that the dag heads are the recorded heads. manifests_commit = GitCommit(manifests) depths = [ [depths[p].get(r, 0) for r in roots] for p in manifests_commit.parents ] manifests_commit_parents = [ p for _, p in sorted(zip(depths, manifests_commit.parents)) ] previous = None all_interesting = set() for m in progress_iter('Checking {} manifest heads', manifests_commit_parents): c = GitCommit(m) if not SHA1_RE.match(c.body): status.report('Invalid manifest metadata in git commit %s' % m) continue gitsha1 = GitHgHelper.hg2git(c.body) if gitsha1 == NULL_NODE_ID: status.report('Missing hg2git metadata for manifest %s' % c.body) continue if not GitHgHelper.check_manifest(c.body): status.report('Sha1 mismatch for manifest %s' % c.body) files = {} if previous: for _, _, before, after, d, path in GitHgHelper.diff_tree( previous, m): if d in 'AM' and before != after and \ (path, after) not in all_interesting: files[path] = after else: for _, t, sha1, path in GitHgHelper.ls_tree(m, recursive=True): if (path, sha1) not in all_interesting: files[path] = sha1 all_interesting.update(files.iteritems()) previous = m if status('broken'): return 1 progress = Progress('Checking {} files') while all_interesting and manifest_queue: (m, parents) = manifest_queue.pop() changes = get_changes(m, parents, all=True) for path, hg_file, hg_fileparents in changes: if hg_fileparents[1:] == (hg_file,): continue elif hg_fileparents[:1] == (hg_file,): continue # Reaching here means the file received a modification compared # to its parents. If it's a file we're going to check below, # it means we don't need to check its parents if somehow they were # going to be checked. If it's not a file we're going to check # below, it's because it's either a file we weren't interested in # in the first place, or it's the parent of a file we have checked. # Either way, we aren't interested in the parents. for p in hg_fileparents: all_interesting.discard((path, p)) if (path, hg_file) not in all_interesting: continue all_interesting.remove((path, hg_file)) if not GitHgHelper.check_file(hg_file, *hg_fileparents): p = store.manifest_path(path) status.report( 'Sha1 mismatch for file %s\n' ' revision %s' % (p, hg_file)) print_parents = ' '.join(p for p in hg_fileparents if p != NULL_NODE_ID) if print_parents: status.report(' with parent%s %s' % ( 's' if len(print_parents) > 41 else '', print_parents)) progress.progress() progress.finish() if all_interesting: status.info('Could not find the following files:') for path, sha1 in sorted(all_interesting): p = store.manifest_path(path) status.info(' %s %s' % (sha1, path)) status.info( 'This might be a bug in `git cinnabar fsck`. Please open ' 'an issue, with the message above, on\n' 'https://github.com/glandium/git-cinnabar/issues') return 1 if status('broken'): status.info( 'Your git-cinnabar repository appears to be corrupted.\n' 'Please open an issue, with the information above, on\n' 'https://github.com/glandium/git-cinnabar/issues') Git.update_ref('refs/cinnabar/broken', metadata_commit) if Git.resolve_ref('refs/cinnabar/checked'): status.info( '\nThen please try to run `git cinnabar rollback --fsck` to ' 'restore last known state, and to update from the mercurial ' 'repository.') else: status.info('\nThen please try to run `git cinnabar reclone`.') status.info( '\nPlease note this may affect the commit sha1s of mercurial ' 'changesets, and may require to rebase your local branches.') status.info( '\nAlternatively, you may start afresh with a new clone. In any ' 'case, please keep this corrupted repository around for further ' 'debugging.') return 1 refresh = [] if fix_changeset_heads: status.fix('Fixing changeset heads metadata order.') refresh.append('refs/cinnabar/changesets') interval_expired('fsck', 0) store.close(refresh=refresh) GitHgHelper._helper = False metadata_commit = Git.resolve_ref('refs/cinnabar/metadata') Git.update_ref('refs/cinnabar/checked', metadata_commit) return 0