Esempio n. 1
0
def data(args):
    '''dump the contents of a mercurial revision'''

    store = GitHgStore()
    if args.changeset and args.manifest:
        print('Cannot use both -c and -m.', file=sys.stderr)
        return 1
    if args.changeset:
        sys.stdout.write(store.changeset(args.rev).raw_data)
    elif args.manifest:
        sys.stdout.write(store.manifest(args.rev).data)
    else:
        sys.stdout.write(store.file(args.rev).raw_data)
    store.close()
Esempio n. 2
0
def data(args):
    '''dump the contents of a mercurial revision'''

    store = GitHgStore()
    if args.changeset and args.manifest:
        print >>sys.stderr, 'Cannot use both -c and -m.'
        return 1
    if args.changeset:
        sys.stdout.write(store.changeset(args.rev).raw_data)
    elif args.manifest:
        sys.stdout.write(store.manifest(args.rev).data)
    else:
        sys.stdout.write(store.file(args.rev).raw_data)
    store.close()
Esempio n. 3
0
def data(args):
    '''dump the contents of a mercurial revision'''

    try:
        store = GitHgStore()
    except UpgradeException as e:
        print >> sys.stderr, e.message
        return 1
    if args.changeset and args.manifest:
        print >> sys.stderr, 'Cannot use both -c and -m.'
        return 1
    if args.changeset:
        sys.stdout.write(store.changeset(args.rev).raw_data)
    elif args.manifest:
        sys.stdout.write(store.manifest(args.rev).data)
    else:
        sys.stdout.write(store.file(args.rev).raw_data)
    store.close()
Esempio n. 4
0
def main(args):
    cmd = args.pop(0)
    if cmd == 'data':
        store = GitHgStore()
        if args[0] == '-c':
            sys.stdout.write(store.changeset(args[1]).data)
        elif args[0] == '-m':
            sys.stdout.write(store.manifest(args[1]).data)
        store.close()
    elif cmd == 'fsck':
        return fsck(args)
    elif cmd == 'reclone':
        for ref in Git.for_each_ref('refs/cinnabar',
                                    'refs/remote-hg',
                                    'refs/notes/cinnabar',
                                    'refs/notes/remote-hg/git2hg',
                                    format='%(refname)'):
            Git.delete_ref(ref)
        Git.close()

        for line in Git.iter('config', '--get-regexp', 'remote\..*\.url'):
            config, url = line.split()
            name = config[len('remote.'):-len('.url')]
            skip_pref = 'remote.%s.skipDefaultUpdate' % name
            if (url.startswith('hg::')
                    and Git.config(skip_pref, 'bool') != 'true'):
                Git.run('remote', 'update', '--prune', name)

        print 'Please note that reclone left your local branches untouched.'
        print 'They may be based on entirely different commits.'
    elif cmd == 'hg2git':
        for arg in args:
            print GitHgHelper.hg2git(arg)
    elif cmd == 'git2hg':
        for arg in args:
            data = GitHgHelper.git2hg(arg)
            if data:
                data = ChangesetData.parse(data)
                print data.get('changeset', NULL_NODE_ID)
            else:
                print NULL_NODE_ID
    else:
        print >> sys.stderr, 'Unknown command:', cmd
        return 1
Esempio n. 5
0
def main(args):
    cmd = args.pop(0)
    if cmd == 'data':
        store = GitHgStore()
        if args[0] == '-c':
            sys.stdout.write(store.changeset(args[1]).data)
        elif args[0] == '-m':
            sys.stdout.write(store.manifest(args[1]).data)
        store.close()
    elif cmd == 'fsck':
        return fsck(args)
    elif cmd == 'reclone':
        for ref in Git.for_each_ref('refs/cinnabar', 'refs/remote-hg',
                                    'refs/notes/cinnabar',
                                    'refs/notes/remote-hg/git2hg',
                                    format='%(refname)'):
            Git.delete_ref(ref)
        Git.close()

        for line in Git.iter('config', '--get-regexp', 'remote\..*\.url'):
            config, url = line.split()
            name = config[len('remote.'):-len('.url')]
            skip_pref = 'remote.%s.skipDefaultUpdate' % name
            if (url.startswith('hg::') and
                    Git.config(skip_pref, 'bool') != 'true'):
                Git.run('remote', 'update', '--prune', name)

        print 'Please note that reclone left your local branches untouched.'
        print 'They may be based on entirely different commits.'
    elif cmd == 'hg2git':
        for arg in args:
            print GitHgHelper.hg2git(arg)
    elif cmd == 'git2hg':
        for arg in args:
            data = GitHgHelper.git2hg(arg)
            if data:
                data = ChangesetData.parse(data)
                print data.get('changeset', NULL_NODE_ID)
            else:
                print NULL_NODE_ID
    else:
        print >>sys.stderr, 'Unknown command:', cmd
        return 1
Esempio n. 6
0
def fsck(args):
    '''check cinnabar metadata consistency'''

    if not args.commit and not args.full:
        return fsck_quick(args.force)

    status = FsckStatus()

    store = GitHgStore()

    if args.full and args.commit:
        logging.error('Cannot pass both --full and a commit')
        return 1

    if args.commit:
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            cs = store.hg_changeset(c)
            if cs:
                commits.add(c)
                c = cs.node
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not cs:
                status.info('Unknown commit or changeset: %s' % c)
                return 1
            if not cs:
                cs = store.hg_changeset(commit)
                commits.add(commit)

        all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits)
    else:
        all_refs = dict(
            (ref, sha1) for sha1, ref in Git.for_each_ref('refs/cinnabar'))

        if 'refs/cinnabar/metadata' in all_refs:
            git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata']
        else:
            assert False

        all_git_commits = GitHgHelper.rev_list('--topo-order',
                                               '--full-history', '--reverse',
                                               git_heads)

    dag = gitdag()

    GitHgHelper.reset_heads('manifests')

    full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG)

    for node, tree, parents in progress_iter('Checking {} changesets',
                                             all_git_commits):
        node = store._replace.get(node, node)
        hg_node = store.hg_changeset(node)
        if not hg_node:
            status.report('Missing note for git commit: ' + node)
            continue
        GitHgHelper.seen('git2hg', node)

        changeset_data = store.changeset(hg_node)
        changeset = changeset_data.node

        GitHgHelper.seen('hg2git', changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            status.report('Missing changeset in hg2git branch: %s' % changeset)
            continue
        elif str(changeset_ref) != node:
            status.report('Commit mismatch for changeset %s\n'
                          '  hg2git: %s\n  commit: %s' %
                          (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        if hg_changeset.node != hg_changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.branch or 'default')

        raw_changeset = Changeset.from_git_commit(node)
        patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data)
        if patcher != store.read_changeset_data(node):
            status.fix('Adjusted changeset metadata for %s' % changeset)
            GitHgHelper.set('changeset', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset', changeset, node)
            GitHgHelper.put_blob(patcher, want_sha1=False)
            GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset-metadata', changeset, ':1')

        manifest = changeset_data.manifest
        if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID:
            continue
        manifest_ref = store.manifest_ref(manifest)
        if not manifest_ref:
            status.report('Missing manifest in hg2git branch: %s' % manifest)

        parents = tuple(
            store.changeset(p).manifest for p in hg_changeset.parents)
        git_parents = tuple(
            store.manifest_ref(p) for p in parents if p != NULL_NODE_ID)

        # This doesn't change the value but makes the helper track the manifest
        # dag.
        GitHgHelper.set('manifest', manifest, manifest_ref)

        if not GitHgHelper.check_manifest(manifest):
            status.report('Sha1 mismatch for manifest %s' % manifest)

        manifest_commit_parents = GitCommit(manifest_ref).parents
        if sorted(manifest_commit_parents) != sorted(git_parents):
            # TODO: better error
            status.report(
                '%s(%s) %s != %s' %
                (manifest, manifest_ref, manifest_commit_parents, git_parents))

        # TODO: check that manifest content matches changeset content

        changes = get_changes(manifest_ref, git_parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE
                                            or GitHgHelper.seen(
                                                'hg2git', hg_file)):
                if full_file_check:
                    file = store.file(hg_file, hg_fileparents, git_parents,
                                      store.manifest_path(path))
                    valid = file.node == file.sha1
                else:
                    valid = GitHgHelper.check_file(hg_file, *hg_fileparents)
                if not valid:
                    status.report('Sha1 mismatch for file %s in manifest %s' %
                                  (hg_file, manifest_ref))

    if not args.commit and not status('broken'):
        store_manifest_heads = set(store._manifest_heads_orig)
        manifest_heads = set(GitHgHelper.heads('manifests'))
        if store_manifest_heads != manifest_heads:

            def iter_manifests(a, b):
                for h in a - b:
                    yield h
                for h in b:
                    yield '^%s' % h

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(manifest_heads, store_manifest_heads)):
                status.fix('Missing manifest commit in manifest branch: %s' %
                           m)

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(store_manifest_heads, manifest_heads)):
                status.fix('Removing metadata commit %s with no corresponding '
                           'changeset' % (m))

            for h in store_manifest_heads - manifest_heads:
                if GitHgHelper.seen('hg2git', store.hg_manifest(h)):
                    status.fix('Removing non-head reference to %s in manifests'
                               ' metadata.' % h)
    dangling = ()
    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('hg2git')
    for obj in dangling:
        status.fix('Removing dangling metadata for ' + obj)
        # Theoretically, we should figure out if they are files, manifests
        # or changesets and set the right variable accordingly, but in
        # practice, it makes no difference. Reevaluate when GitHgStore.close
        # is modified, though.
        GitHgHelper.set('file', obj, NULL_NODE_ID)
        GitHgHelper.set('file-meta', obj, NULL_NODE_ID)

    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('git2hg')
    for c in dangling:
        status.fix('Removing dangling note for commit ' + c)
        GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID)

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted. There\n'
            'are known issues in older revisions that have been fixed.\n'
            'Please try running the following command to reset:\n'
            '  git cinnabar reclone\n\n'
            'Please note this command may change the commit sha1s. Your\n'
            'local branches will however stay untouched.\n'
            'Please report any corruption that fsck would detect after a\n'
            'reclone.')

    if not args.commit:
        status.info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                status.fix('Adding missing head %s in branch %s' %
                           (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                status.fix('Removing non-head reference to %s in branch %s' %
                           (head, branch))
                del store._hgheads[head]

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if status('broken'):
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        return 1

    if args.full:
        Git.update_ref('refs/cinnabar/checked', metadata_commit)
    interval_expired('fsck', 0)
    store.close()

    if status('fixed'):
        return 2
    return 0
Esempio n. 7
0
def fsck(args):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--manifests', action='store_true',
        help='Validate manifests hashes')
    parser.add_argument(
        '--files', action='store_true',
        help='Validate files hashes')
    parser.add_argument(
        'commit', nargs='*',
        help='Specific commit or changeset to check')
    args = parser.parse_args(args)

    status = {
        'broken': False,
        'fixed': False,
    }

    def info(message):
        sys.stderr.write('\r')
        print message

    def fix(message):
        status['fixed'] = True
        info(message)

    def report(message):
        status['broken'] = True
        info(message)

    store = GitHgStore()
    store.init_fast_import(lambda: FastImport())

    if args.commit:
        all_hg2git = {}
        all_notes = set()
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            data = store.read_changeset_data(c)
            if data:
                all_notes.add(c)
                commits.add(c)
                c = data['changeset']
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not data:
                info('Unknown commit or changeset: %s' % c)
                return 1
            if commit != NULL_NODE_ID:
                all_hg2git[c] = commit, 'commit'
            if not data:
                data = store.read_changeset_data(commit)
                commits.add(commit)
                if data:
                    all_notes.add(commit)

        all_git_commits = Git.iter(
            'log', '--no-walk=unsorted', '--stdin', '--format=%T %H',
            stdin=commits)
    else:
        all_hg2git = {
            path.replace('/', ''): (filesha1, intern(typ))
            for mode, typ, filesha1, path in
            progress_iter('Reading %d mercurial to git mappings',
                          Git.ls_tree('refs/cinnabar/hg2git', recursive=True))
        }

        all_notes = set(path.replace('/', '') for mode, typ, filesha1, path in
                        progress_iter(
                            'Reading %d commit to changeset mappings',
                            Git.ls_tree('refs/notes/cinnabar',
                                        recursive=True)))

        manifest_commits = OrderedDict((m, None) for m in progress_iter(
            'Reading %d manifest trees',
            Git.iter('rev-list', '--full-history',
                     '--topo-order', 'refs/cinnabar/manifest'))
        )

        all_git_heads = Git.for_each_ref('refs/cinnabar/branches',
                                         format='%(refname)')

        all_git_commits = Git.iter('log', '--topo-order', '--full-history',
                                   '--reverse', '--stdin', '--format=%T %H',
                                   stdin=all_git_heads)

    store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()}

    seen_changesets = set()
    seen_manifests = set()
    seen_manifest_refs = {}
    seen_files = set()
    seen_notes = set()

    hg_manifest = None

    dag = gitdag()

    for line in progress_iter('Checking %d changesets', all_git_commits):
        tree, node = line.split(' ')
        if node not in all_notes:
            report('Missing note for git commit: ' + node)
            continue
        seen_notes.add(node)

        changeset_data = store.read_changeset_data(node)
        changeset = changeset_data['changeset']
        if 'extra' in changeset_data:
            extra = changeset_data['extra']
            header, message = GitHgHelper.cat_file(
                'commit', node).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if 'committer' in extra:
                committer_info = store.hg_author_info(header['committer'])
                committer = '%s %d %d' % committer_info
                if (committer != extra['committer'] and
                        header['committer'] != extra['committer'] and
                        committer_info[0] != extra['committer']):
                    report('Committer mismatch between commit and metadata for'
                           ' changeset %s' % changeset)
                if committer == extra['committer']:
                    fix('Fixing useless committer metadata for changeset %s'
                        % changeset)
                    del changeset_data['extra']['committer']
                    store._changesets[changeset] = LazyString(node)
            if header['committer'] != header['author'] and not extra:
                fix('Fixing useless empty extra metadata for changeset %s'
                    % changeset)
                del changeset_data['extra']
                store._changesets[changeset] = LazyString(node)

        seen_changesets.add(changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            report('Missing changeset in hg2git branch: %s' % changeset)
        elif str(changeset_ref) != node:
            report('Commit mismatch for changeset %s\n'
                   '  hg2git: %s\n  commit: %s'
                   % (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        sha1 = hg_changeset.sha1
        if hg_changeset.node != sha1:
            try_fixup = False
            if (changeset, sha1) in (
                ('8c557b7c03a4a753e5c163038f04862e9f65fce1',
                 '249b59139de8e08abeb6c4e261a137c756e7af0e'),
                ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf',
                 '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'),
            ):
                header = hg_changeset.data.split('\n', 4)
                start = sum(len(h) for h in header[:3]) + 1
                changeset_data['patch'] = ((start, start + 1, '1'),)
                try_fixup = True

            # Some know cases of corruptions involve a whitespace after the
            # timezone. Adding an empty extra metadata works around those.
            elif 'extra' not in changeset_data:
                changeset_data['extra'] = {}
                try_fixup = True

            if try_fixup:
                hg_changeset = store.changeset(changeset, include_parents=True)
                sha1 = hg_changeset.sha1
                if hg_changeset.node == sha1:
                    fix('Fixing known sha1 mismatch for changeset %s' %
                        changeset)
                    store._changesets[changeset] = LazyString(node)

        if hg_changeset.node != sha1:
            report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.get('extra', {}).get('branch', 'default'))

        manifest = changeset_data['manifest']
        if manifest in seen_manifests:
            continue
        seen_manifests.add(manifest)
        manifest_ref = store.manifest_ref(manifest)
        if manifest_ref:
            seen_manifest_refs[manifest_ref] = manifest
        if not manifest_ref:
            report('Missing manifest in hg2git branch: %s' % manifest)
        elif not args.commit and manifest_ref not in manifest_commits:
            report('Missing manifest commit in manifest branch: %s' %
                   manifest_ref)

        if args.manifests or args.files:
            parents = tuple(
                store.read_changeset_data(store.changeset_ref(p))['manifest']
                for p in (hg_changeset.parent1, hg_changeset.parent2)
                if p != NULL_NODE_ID
            )

        if args.manifests:
            try:
                with GitHgHelper.query('check-manifest', manifest,
                                       *parents) as stdout:
                    if stdout.readline().strip() != 'ok':
                        report('Sha1 mismatch for manifest %s' % manifest)
            except NoHelperException:
                hg_manifest = store.manifest(manifest)
                hg_manifest.set_parents(*parents)
                if hg_manifest.node != hg_manifest.sha1:
                    report('Sha1 mismatch for manifest %s' % manifest)

        git_ls = one(Git.ls_tree(manifest_ref, 'git'))
        if git_ls:
            mode, typ, sha1, path = git_ls
        else:
            header, message = GitHgHelper.cat_file(
                'commit', manifest_ref).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if header['tree'] == EMPTY_TREE:
                sha1 = EMPTY_TREE
            else:
                report('Missing git tree in manifest commit %s' % manifest_ref)
                sha1 = None
        if sha1 and sha1 != tree:
            report('Tree mismatch between manifest commit %s and commit %s'
                   % (manifest_ref, node))

        if args.files:
            changes = get_changes(
                manifest_ref, tuple(store.manifest_ref(p) for p in parents),
                'hg')
            for path, hg_file, hg_fileparents in changes:
                if hg_file != NULL_NODE_ID and hg_file not in seen_files:
                    file = store.file(hg_file)
                    file.set_parents(*hg_fileparents)
                    if file.node != file.sha1:
                        report('Sha1 mismatch for file %s in manifest %s'
                               % (hg_file, manifest_ref))
                    seen_files.add(hg_file)

    if args.files:
        all_hg2git = set(all_hg2git.iterkeys())
    else:
        all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems()
                         if t == 'commit')

    adjusted = {}
    if not args.commit:
        dangling = set(manifest_commits) - set(seen_manifest_refs)
        if dangling:
            def iter_manifests():
                removed_one = False
                yielded = False
                previous = None
                for obj in reversed(manifest_commits):
                    if obj in dangling:
                        fix('Removing metadata commit %s with no hg2git entry'
                            % obj)
                        removed_one = True
                    else:
                        if removed_one:
                            yield obj, previous
                            yielded = True
                        previous = obj

                if removed_one and not yielded:
                    yield obj, False

            for obj, parent in progress_iter('Adjusting %d metadata commits',
                                             iter_manifests()):
                mark = store._fast_import.new_mark()
                if parent is False:
                    Git.update_ref('refs/cinnabar/manifest', obj)
                    continue
                elif parent:
                    parents = (adjusted.get(parent, parent),)
                with store._fast_import.commit(
                        ref='refs/cinnabar/manifest',
                        parents=parents, mark=mark) as commit:
                    mode, typ, tree, path = store._fast_import.ls(obj)
                    commit.filemodify('', tree, typ='tree')
                adjusted[obj] = Mark(mark)

    dangling = all_hg2git - seen_changesets - seen_manifests - seen_files
    if dangling or adjusted:
        with store._fast_import.commit(
                ref='refs/cinnabar/hg2git',
                parents=('refs/cinnabar/hg2git^0',)) as commit:
            for obj in dangling:
                fix('Removing dangling metadata for ' + obj)
                commit.filedelete(sha1path(obj))
            for obj, mark in progress_iter(
                    'Updating hg2git for %d metadata commits',
                    adjusted.iteritems()):
                commit.filemodify(sha1path(seen_manifest_refs[obj]), mark,
                                  typ='commit')

    dangling = all_notes - seen_notes
    if dangling:
        with store._fast_import.commit(
                ref='refs/notes/cinnabar',
                parents=('refs/notes/cinnabar^0',)) as commit:
            for c in dangling:
                fix('Removing dangling note for commit ' + c)
                # That's brute force, but meh.
                for l in range(0, 10):
                    commit.filedelete(sha1path(c, l))

    if status['broken']:
        info('Your git-cinnabar repository appears to be corrupted. There\n'
             'are known issues in older revisions that have been fixed.\n'
             'Please try running the following command to reset:\n'
             '  git cinnabar reclone\n\n'
             'Please note this command may change the commit sha1s. Your\n'
             'local branches will however stay untouched.\n'
             'Please report any corruption that fsck would detect after a\n'
             'reclone.')

    if not args.commit:
        info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                fix('Adding missing head %s in branch %s' %
                    (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                fix('Removing non-head reference to %s in branch %s' %
                    (head, branch))
                store._hgheads.remove((branch, head))

    store.close()

    if status['broken']:
        return 1
    if status['fixed']:
        return 2
    return 0
Esempio n. 8
0
def fsck(args):
    parser = argparse.ArgumentParser()
    parser.add_argument('--manifests',
                        action='store_true',
                        help='Validate manifests hashes')
    parser.add_argument('--files',
                        action='store_true',
                        help='Validate files hashes')
    parser.add_argument('commit',
                        nargs='*',
                        help='Specific commit or changeset to check')
    args = parser.parse_args(args)

    status = {
        'broken': False,
        'fixed': False,
    }

    def info(message):
        sys.stderr.write('\r')
        print message

    def fix(message):
        status['fixed'] = True
        info(message)

    def report(message):
        status['broken'] = True
        info(message)

    store = GitHgStore()
    store.init_fast_import(lambda: FastImport())

    if args.commit:
        all_hg2git = {}
        all_notes = set()
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            data = store.read_changeset_data(c)
            if data:
                all_notes.add(c)
                commits.add(c)
                c = data['changeset']
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not data:
                info('Unknown commit or changeset: %s' % c)
                return 1
            if commit != NULL_NODE_ID:
                all_hg2git[c] = commit, 'commit'
            if not data:
                data = store.read_changeset_data(commit)
                commits.add(commit)
                if data:
                    all_notes.add(commit)

        all_git_commits = Git.iter('log',
                                   '--no-walk=unsorted',
                                   '--stdin',
                                   '--format=%T %H',
                                   stdin=commits)
    else:
        all_hg2git = {
            path.replace('/', ''): (filesha1, intern(typ))
            for mode, typ, filesha1, path in progress_iter(
                'Reading %d mercurial to git mappings',
                Git.ls_tree('refs/cinnabar/hg2git', recursive=True))
        }

        all_notes = set(
            path.replace('/', '')
            for mode, typ, filesha1, path in progress_iter(
                'Reading %d commit to changeset mappings',
                Git.ls_tree('refs/notes/cinnabar', recursive=True)))

        manifest_commits = OrderedDict((m, None) for m in progress_iter(
            'Reading %d manifest trees',
            Git.iter('rev-list', '--full-history', '--topo-order',
                     'refs/cinnabar/manifest')))

        all_git_heads = Git.for_each_ref('refs/cinnabar/branches',
                                         format='%(refname)')

        all_git_commits = Git.iter('log',
                                   '--topo-order',
                                   '--full-history',
                                   '--reverse',
                                   '--stdin',
                                   '--format=%T %H',
                                   stdin=all_git_heads)

    store._hg2git_cache = {p: s for p, (s, t) in all_hg2git.iteritems()}

    seen_changesets = set()
    seen_manifests = set()
    seen_manifest_refs = {}
    seen_files = set()
    seen_notes = set()

    hg_manifest = None

    dag = gitdag()

    for line in progress_iter('Checking %d changesets', all_git_commits):
        tree, node = line.split(' ')
        if node not in all_notes:
            report('Missing note for git commit: ' + node)
            continue
        seen_notes.add(node)

        changeset_data = store.read_changeset_data(node)
        changeset = changeset_data['changeset']
        if 'extra' in changeset_data:
            extra = changeset_data['extra']
            header, message = GitHgHelper.cat_file('commit',
                                                   node).split('\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if 'committer' in extra:
                committer_info = store.hg_author_info(header['committer'])
                committer = '%s %d %d' % committer_info
                if (committer != extra['committer']
                        and header['committer'] != extra['committer']
                        and committer_info[0] != extra['committer']):
                    report('Committer mismatch between commit and metadata for'
                           ' changeset %s' % changeset)
                if committer == extra['committer']:
                    fix('Fixing useless committer metadata for changeset %s' %
                        changeset)
                    del changeset_data['extra']['committer']
                    store._changesets[changeset] = LazyString(node)
            if header['committer'] != header['author'] and not extra:
                fix('Fixing useless empty extra metadata for changeset %s' %
                    changeset)
                del changeset_data['extra']
                store._changesets[changeset] = LazyString(node)

        seen_changesets.add(changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            report('Missing changeset in hg2git branch: %s' % changeset)
        elif str(changeset_ref) != node:
            report('Commit mismatch for changeset %s\n'
                   '  hg2git: %s\n  commit: %s' %
                   (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        sha1 = hg_changeset.sha1
        if hg_changeset.node != sha1:
            try_fixup = False
            if (changeset, sha1) in (
                ('8c557b7c03a4a753e5c163038f04862e9f65fce1',
                 '249b59139de8e08abeb6c4e261a137c756e7af0e'),
                ('ffdee4a4eb7fc7cae80dfc4cb2fe0c3178773dcf',
                 '415e9d2eac83d508bf58a4df585c5f6b2b0f44ed'),
            ):
                header = hg_changeset.data.split('\n', 4)
                start = sum(len(h) for h in header[:3]) + 1
                changeset_data['patch'] = ((start, start + 1, '1'), )
                try_fixup = True

            # Some know cases of corruptions involve a whitespace after the
            # timezone. Adding an empty extra metadata works around those.
            elif 'extra' not in changeset_data:
                changeset_data['extra'] = {}
                try_fixup = True

            if try_fixup:
                hg_changeset = store.changeset(changeset, include_parents=True)
                sha1 = hg_changeset.sha1
                if hg_changeset.node == sha1:
                    fix('Fixing known sha1 mismatch for changeset %s' %
                        changeset)
                    store._changesets[changeset] = LazyString(node)

        if hg_changeset.node != sha1:
            report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.get('extra', {}).get('branch', 'default'))

        manifest = changeset_data['manifest']
        if manifest in seen_manifests:
            continue
        seen_manifests.add(manifest)
        manifest_ref = store.manifest_ref(manifest)
        if manifest_ref:
            seen_manifest_refs[manifest_ref] = manifest
        if not manifest_ref:
            report('Missing manifest in hg2git branch: %s' % manifest)
        elif not args.commit and manifest_ref not in manifest_commits:
            report('Missing manifest commit in manifest branch: %s' %
                   manifest_ref)

        if args.manifests or args.files:
            parents = tuple(
                store.read_changeset_data(store.changeset_ref(p))['manifest']
                for p in (hg_changeset.parent1, hg_changeset.parent2)
                if p != NULL_NODE_ID)

        if args.manifests:
            try:
                with GitHgHelper.query('check-manifest', manifest,
                                       *parents) as stdout:
                    if stdout.readline().strip() != 'ok':
                        report('Sha1 mismatch for manifest %s' % manifest)
            except NoHelperException:
                hg_manifest = store.manifest(manifest)
                hg_manifest.set_parents(*parents)
                if hg_manifest.node != hg_manifest.sha1:
                    report('Sha1 mismatch for manifest %s' % manifest)

        git_ls = one(Git.ls_tree(manifest_ref, 'git'))
        if git_ls:
            mode, typ, sha1, path = git_ls
        else:
            header, message = GitHgHelper.cat_file('commit',
                                                   manifest_ref).split(
                                                       '\n\n', 1)
            header = dict(l.split(' ', 1) for l in header.splitlines())
            if header['tree'] == EMPTY_TREE:
                sha1 = EMPTY_TREE
            else:
                report('Missing git tree in manifest commit %s' % manifest_ref)
                sha1 = None
        if sha1 and sha1 != tree:
            report('Tree mismatch between manifest commit %s and commit %s' %
                   (manifest_ref, node))

        if args.files:
            changes = get_changes(
                manifest_ref, tuple(store.manifest_ref(p) for p in parents),
                'hg')
            for path, hg_file, hg_fileparents in changes:
                if hg_file != NULL_NODE_ID and hg_file not in seen_files:
                    file = store.file(hg_file)
                    file.set_parents(*hg_fileparents)
                    if file.node != file.sha1:
                        report('Sha1 mismatch for file %s in manifest %s' %
                               (hg_file, manifest_ref))
                    seen_files.add(hg_file)

    if args.files:
        all_hg2git = set(all_hg2git.iterkeys())
    else:
        all_hg2git = set(k for k, (s, t) in all_hg2git.iteritems()
                         if t == 'commit')

    adjusted = {}
    if not args.commit:
        dangling = set(manifest_commits) - set(seen_manifest_refs)
        if dangling:

            def iter_manifests():
                removed_one = False
                yielded = False
                previous = None
                for obj in reversed(manifest_commits):
                    if obj in dangling:
                        fix('Removing metadata commit %s with no hg2git entry'
                            % obj)
                        removed_one = True
                    else:
                        if removed_one:
                            yield obj, previous
                            yielded = True
                        previous = obj

                if removed_one and not yielded:
                    yield obj, False

            for obj, parent in progress_iter('Adjusting %d metadata commits',
                                             iter_manifests()):
                mark = store._fast_import.new_mark()
                if parent is False:
                    Git.update_ref('refs/cinnabar/manifest', obj)
                    continue
                elif parent:
                    parents = (adjusted.get(parent, parent), )
                with store._fast_import.commit(ref='refs/cinnabar/manifest',
                                               parents=parents,
                                               mark=mark) as commit:
                    mode, typ, tree, path = store._fast_import.ls(obj)
                    commit.filemodify('', tree, typ='tree')
                adjusted[obj] = Mark(mark)

    dangling = all_hg2git - seen_changesets - seen_manifests - seen_files
    if dangling or adjusted:
        with store._fast_import.commit(
                ref='refs/cinnabar/hg2git',
                parents=('refs/cinnabar/hg2git^0', )) as commit:
            for obj in dangling:
                fix('Removing dangling metadata for ' + obj)
                commit.filedelete(sha1path(obj))
            for obj, mark in progress_iter(
                    'Updating hg2git for %d metadata commits',
                    adjusted.iteritems()):
                commit.filemodify(sha1path(seen_manifest_refs[obj]),
                                  mark,
                                  typ='commit')

    dangling = all_notes - seen_notes
    if dangling:
        with store._fast_import.commit(
                ref='refs/notes/cinnabar',
                parents=('refs/notes/cinnabar^0', )) as commit:
            for c in dangling:
                fix('Removing dangling note for commit ' + c)
                # That's brute force, but meh.
                for l in range(0, 10):
                    commit.filedelete(sha1path(c, l))

    if status['broken']:
        info('Your git-cinnabar repository appears to be corrupted. There\n'
             'are known issues in older revisions that have been fixed.\n'
             'Please try running the following command to reset:\n'
             '  git cinnabar reclone\n\n'
             'Please note this command may change the commit sha1s. Your\n'
             'local branches will however stay untouched.\n'
             'Please report any corruption that fsck would detect after a\n'
             'reclone.')

    if not args.commit:
        info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                fix('Adding missing head %s in branch %s' % (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                fix('Removing non-head reference to %s in branch %s' %
                    (head, branch))
                store._hgheads.remove((branch, head))

    store.close()

    if status['broken']:
        return 1
    if status['fixed']:
        return 2
    return 0
Esempio n. 9
0
def fsck(args):
    '''check cinnabar metadata consistency'''

    if not args.commit and not args.full:
        return fsck_quick()

    status = FsckStatus()

    store = GitHgStore()

    if args.full and args.commit:
        logging.error('Cannot pass both --full and a commit')
        return 1

    if args.commit:
        commits = set()
        all_git_commits = {}

        for c in args.commit:
            cs = store.hg_changeset(c)
            if cs:
                commits.add(c)
                c = cs.node
            commit = GitHgHelper.hg2git(c)
            if commit == NULL_NODE_ID and not cs:
                status.info('Unknown commit or changeset: %s' % c)
                return 1
            if not cs:
                cs = store.hg_changeset(commit)
                commits.add(commit)

        all_git_commits = GitHgHelper.rev_list('--no-walk=unsorted', *commits)
    else:
        all_refs = dict((ref, sha1)
                        for sha1, ref in Git.for_each_ref('refs/cinnabar'))

        if 'refs/cinnabar/metadata' in all_refs:
            git_heads = '%s^^@' % all_refs['refs/cinnabar/metadata']
        else:
            assert False

        all_git_commits = GitHgHelper.rev_list(
            '--topo-order', '--full-history', '--reverse', git_heads)

    dag = gitdag()

    GitHgHelper.reset_heads('manifests')

    full_file_check = FileFindParents.logger.isEnabledFor(logging.DEBUG)

    for node, tree, parents in progress_iter('Checking {} changesets',
                                             all_git_commits):
        node = store._replace.get(node, node)
        hg_node = store.hg_changeset(node)
        if not hg_node:
            status.report('Missing note for git commit: ' + node)
            continue
        GitHgHelper.seen('git2hg', node)

        changeset_data = store.changeset(hg_node)
        changeset = changeset_data.node

        GitHgHelper.seen('hg2git', changeset)
        changeset_ref = store.changeset_ref(changeset)
        if not changeset_ref:
            status.report('Missing changeset in hg2git branch: %s' % changeset)
            continue
        elif str(changeset_ref) != node:
            status.report('Commit mismatch for changeset %s\n'
                          '  hg2git: %s\n  commit: %s'
                          % (changeset, changeset_ref, node))

        hg_changeset = store.changeset(changeset, include_parents=True)
        if hg_changeset.node != hg_changeset.sha1:
            status.report('Sha1 mismatch for changeset %s' % changeset)

        dag.add(hg_changeset.node,
                (hg_changeset.parent1, hg_changeset.parent2),
                changeset_data.branch or 'default')

        raw_changeset = Changeset.from_git_commit(node)
        patcher = ChangesetPatcher.from_diff(raw_changeset, changeset_data)
        if patcher != store.read_changeset_data(node):
            status.fix('Adjusted changeset metadata for %s' % changeset)
            GitHgHelper.set('changeset', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset', changeset, node)
            GitHgHelper.put_blob(patcher, want_sha1=False)
            GitHgHelper.set('changeset-metadata', changeset, NULL_NODE_ID)
            GitHgHelper.set('changeset-metadata', changeset, ':1')

        manifest = changeset_data.manifest
        if GitHgHelper.seen('hg2git', manifest) or manifest == NULL_NODE_ID:
            continue
        manifest_ref = store.manifest_ref(manifest)
        if not manifest_ref:
            status.report('Missing manifest in hg2git branch: %s' % manifest)

        parents = tuple(
            store.changeset(p).manifest
            for p in hg_changeset.parents
        )
        git_parents = tuple(store.manifest_ref(p) for p in parents
                            if p != NULL_NODE_ID)

        # This doesn't change the value but makes the helper track the manifest
        # dag.
        GitHgHelper.set('manifest', manifest, manifest_ref)

        if not GitHgHelper.check_manifest(manifest):
            status.report('Sha1 mismatch for manifest %s' % manifest)

        manifest_commit_parents = GitCommit(manifest_ref).parents
        if sorted(manifest_commit_parents) != sorted(git_parents):
            # TODO: better error
            status.report('%s(%s) %s != %s' % (manifest, manifest_ref,
                                               manifest_commit_parents,
                                               git_parents))

        # TODO: check that manifest content matches changeset content

        changes = get_changes(manifest_ref, git_parents)
        for path, hg_file, hg_fileparents in changes:
            if hg_file != NULL_NODE_ID and (hg_file == HG_EMPTY_FILE or
                                            GitHgHelper.seen('hg2git',
                                                             hg_file)):
                if full_file_check:
                    file = store.file(hg_file, hg_fileparents, git_parents,
                                      store.manifest_path(path))
                    valid = file.node == file.sha1
                else:
                    valid = GitHgHelper.check_file(hg_file,
                                                   *hg_fileparents)
                if not valid:
                    status.report(
                        'Sha1 mismatch for file %s in manifest %s'
                        % (hg_file, manifest_ref))

    if not args.commit and not status('broken'):
        store_manifest_heads = set(store._manifest_heads_orig)
        manifest_heads = set(GitHgHelper.heads('manifests'))
        if store_manifest_heads != manifest_heads:
            def iter_manifests(a, b):
                for h in a - b:
                    yield h
                for h in b:
                    yield '^%s' % h

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(manifest_heads, store_manifest_heads)):
                status.fix('Missing manifest commit in manifest branch: %s'
                           % m)

            for m, t, p in GitHgHelper.rev_list(
                    '--topo-order', '--full-history', '--reverse',
                    *iter_manifests(store_manifest_heads, manifest_heads)):
                status.fix('Removing metadata commit %s with no corresponding '
                           'changeset' % (m))

            for h in store_manifest_heads - manifest_heads:
                if GitHgHelper.seen('hg2git', store.hg_manifest(h)):
                    status.fix('Removing non-head reference to %s in manifests'
                               ' metadata.' % h)
    dangling = ()
    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('hg2git')
    for obj in dangling:
        status.fix('Removing dangling metadata for ' + obj)
        # Theoretically, we should figure out if they are files, manifests
        # or changesets and set the right variable accordingly, but in
        # practice, it makes no difference. Reevaluate when GitHgStore.close
        # is modified, though.
        GitHgHelper.set('file', obj, NULL_NODE_ID)
        GitHgHelper.set('file-meta', obj, NULL_NODE_ID)

    if not args.commit and not status('broken'):
        dangling = GitHgHelper.dangling('git2hg')
    for c in dangling:
        status.fix('Removing dangling note for commit ' + c)
        GitHgHelper.set('changeset-metadata', c, NULL_NODE_ID)

    if status('broken'):
        status.info(
            'Your git-cinnabar repository appears to be corrupted. There\n'
            'are known issues in older revisions that have been fixed.\n'
            'Please try running the following command to reset:\n'
            '  git cinnabar reclone\n\n'
            'Please note this command may change the commit sha1s. Your\n'
            'local branches will however stay untouched.\n'
            'Please report any corruption that fsck would detect after a\n'
            'reclone.')

    if not args.commit:
        status.info('Checking head references...')
        computed_heads = defaultdict(set)
        for branch, head in dag.all_heads():
            computed_heads[branch].add(head)

        for branch in sorted(dag.tags()):
            stored_heads = store.heads({branch})
            for head in computed_heads[branch] - stored_heads:
                status.fix('Adding missing head %s in branch %s' %
                           (head, branch))
                store.add_head(head)
            for head in stored_heads - computed_heads[branch]:
                status.fix('Removing non-head reference to %s in branch %s' %
                           (head, branch))
                del store._hgheads[head]

    metadata_commit = Git.resolve_ref('refs/cinnabar/metadata')
    if status('broken'):
        Git.update_ref('refs/cinnabar/broken', metadata_commit)
        return 1

    if args.full:
        Git.update_ref('refs/cinnabar/checked', metadata_commit)
    interval_expired('fsck', 0)
    store.close()

    if status('fixed'):
        return 2
    return 0
Esempio n. 10
0
    def test_store_changeset(self):
        files = {}
        f = File()
        f.content = b'foo\n'
        f.node = f.sha1

        chunk = f.to_chunk(self.RevChunk)
        GitHgHelper.store(b'file', chunk)
        files[f.node] = GitHgHelper.hg2git(chunk.node)

        f2 = File()
        f2.content = b'bar\n'
        f2.node = f2.sha1

        chunk = f2.to_chunk(self.RevChunk)
        GitHgHelper.store(b'file', chunk)
        files[f2.node] = GitHgHelper.hg2git(chunk.node)

        m = Manifest()
        m.add(b'bar', f.node)
        m.add(b'foo/.bar', f.node)
        m.add(b'foo/.foo', f.node)
        m.add(b'foo/bar/baz', f.node)
        m.add(b'foo/bar/foo', f.node)
        m.add(b'foo/bar/qux', f.node)
        m.add(b'foo/foo', f.node)
        m.add(b'foo/hoge', f.node)
        m.add(b'foo/qux', f.node)
        m.add(b'qux', f.node)
        m.node = m.sha1

        chunk = m.to_chunk(self.RevChunk)
        GitHgHelper.store(b'manifest', chunk)

        store = GitHgStore()

        c = Changeset()
        c.manifest = m.node
        c.author = b'Cinnabar test <cinnabar@test>'
        c.timestamp = b'0'
        c.utcoffset = b'0'
        c.files = [i.path for i in m]
        c.body = b'Test commit'
        c.node = c.sha1

        store.store_changeset(c)
        c_gen = store.changeset(c.node)
        self.assertEqual(c.raw_data, c_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c.node))
        self.assertEqual(commit.body, c.body)
        ct = self.commit_tree(m, files)
        self.assertEqual(commit.tree, ct)

        # Weird case as seen in the GNU octave repo.
        # The bar subdirectory is supposed to be transposed to the same
        # content as the git tree for the manifest above.
        m2 = Manifest()
        m2.add(b'bar/bar', f.node)
        m2.add(b'bar/foo/.foo', f.node)
        m2.add(b'bar/foo//.bar', f.node)
        m2.add(b'bar/foo//.foo', f2.node)
        m2.add(b'bar/foo//bar/baz', f2.node)
        m2.add(b'bar/foo//bar/foo', f.node)
        m2.add(b'bar/foo//hoge', f.node)
        m2.add(b'bar/foo/bar/baz', f.node)
        m2.add(b'bar/foo/bar/qux', f.node)
        m2.add(b'bar/foo/foo', f.node)
        m2.add(b'bar/foo/qux', f.node)
        m2.add(b'bar/qux', f.node)
        m2.node = m2.sha1

        chunk = m2.to_chunk(self.RevChunk, m)
        GitHgHelper.store(b'manifest', chunk)

        c2 = Changeset()
        c2.parent1 = c.node
        c2.manifest = m2.node
        c2.author = b'Cinnabar test <cinnabar@test>'
        c2.timestamp = b'0'
        c2.utcoffset = b'0'
        c2.files = [i.path for i in m2]
        c2.body = b'Test commit'
        c2.node = c2.sha1

        store.store_changeset(c2)
        c2_gen = store.changeset(c2.node)
        self.assertEqual(c2.raw_data, c2_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c2.node))
        self.assertEqual(commit.body, c2.body)
        self.assertEqual(ct, one(Git.ls_tree(commit.tree, b'bar'))[2])

        # Corner case: empty manifest
        m3 = Manifest()
        m3.node = m3.sha1
        self.assertEqual(b'b80de5d138758541c5f05265ad144ab9fa86d1db', m3.node)

        chunk = m3.to_chunk(self.RevChunk, m2)
        GitHgHelper.store(b'manifest', chunk)
        commit = GitCommit(GitHgHelper.hg2git(m3.node))
        self.assertEqual(EMPTY_TREE, commit.tree)

        c3 = Changeset()
        c3.parent1 = c2.node
        c3.manifest = m3.node
        c3.author = b'Cinnabar test <cinnabar@test>'
        c3.timestamp = b'0'
        c3.utcoffset = b'0'
        c3.body = b'Test commit'
        c3.node = c3.sha1

        store.store_changeset(c3)
        c3_gen = store.changeset(c3.node)
        self.assertEqual(c3.raw_data, c3_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c3.node))
        self.assertEqual(commit.body, c3.body)
        self.assertEqual(EMPTY_TREE, commit.tree)

        # Corner case: null manifest
        c4 = Changeset()
        c4.parent1 = c3.node
        c4.manifest = NULL_NODE_ID
        c4.author = b'Cinnabar test <cinnabar@test>'
        c4.timestamp = b'0'
        c4.utcoffset = b'0'
        c4.body = b'Test commit'
        c4.node = c4.sha1

        store.store_changeset(c4)
        c4_gen = store.changeset(c4.node)
        self.assertEqual(c4.raw_data, c4_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c4.node))
        self.assertEqual(commit.body, c4.body)
        self.assertEqual(EMPTY_TREE, commit.tree)

        # Corner case: identical changeset with a difference that wouldn't
        # appear in the git commit without adjustment (which is: cinnabar adds
        # a nul character to the commit message..
        chunk = c2.to_chunk(RawRevChunk02)
        c5 = Changeset.from_chunk(chunk)
        c5.branch = b'branched'
        c5.node = c5.sha1

        store.store_changeset(c5)
        c5_gen = store.changeset(c5.node)
        self.assertEqual(c5.raw_data, c5_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c5.node))
        self.assertEqual(commit.body, c5.body + b'\0')
        self.assertEqual(ct, one(Git.ls_tree(commit.tree, b'bar'))[2])
Esempio n. 11
0
    def test_store_changeset(self):
        files = {}
        f = File()
        f.content = b'foo\n'
        f.node = f.sha1

        chunk = f.to_chunk(self.RevChunk)
        GitHgHelper.store(b'file', chunk)
        files[f.node] = GitHgHelper.hg2git(chunk.node)

        f2 = File()
        f2.content = b'bar\n'
        f2.node = f2.sha1

        chunk = f2.to_chunk(self.RevChunk)
        GitHgHelper.store(b'file', chunk)
        files[f2.node] = GitHgHelper.hg2git(chunk.node)

        m = Manifest()
        m.add(b'bar', f.node)
        m.add(b'foo/.bar', f.node)
        m.add(b'foo/.foo', f.node)
        m.add(b'foo/bar/baz', f.node)
        m.add(b'foo/bar/foo', f.node)
        m.add(b'foo/bar/qux', f.node)
        m.add(b'foo/foo', f.node)
        m.add(b'foo/hoge', f.node)
        m.add(b'foo/qux', f.node)
        m.add(b'qux', f.node)
        m.node = m.sha1

        chunk = m.to_chunk(self.RevChunk)
        GitHgHelper.store(b'manifest', chunk)

        store = GitHgStore()

        c = Changeset()
        c.manifest = m.node
        c.author = b'Cinnabar test <cinnabar@test>'
        c.timestamp = b'0'
        c.utcoffset = b'0000'
        c.files = [i.path for i in m]
        c.body = b'Test commit'
        c.node = c.sha1

        store.store_changeset(c)
        c_gen = store.changeset(c.node)
        self.assertEqual(c.raw_data, c_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c.node))
        self.assertEqual(commit.body, c.body)
        ct = self.commit_tree(m, files)
        self.assertEqual(commit.tree, ct)

        # Weird case as seen in the GNU octave repo.
        # The bar subdirectory is supposed to be transposed to the same
        # content as the git tree for the manifest above.
        m2 = Manifest()
        m2.add(b'bar/bar', f.node)
        m2.add(b'bar/foo/.foo', f.node)
        m2.add(b'bar/foo//.bar', f.node)
        m2.add(b'bar/foo//.foo', f2.node)
        m2.add(b'bar/foo//bar/baz', f2.node)
        m2.add(b'bar/foo//bar/foo', f.node)
        m2.add(b'bar/foo//hoge', f.node)
        m2.add(b'bar/foo/bar/baz', f.node)
        m2.add(b'bar/foo/bar/qux', f.node)
        m2.add(b'bar/foo/foo', f.node)
        m2.add(b'bar/foo/qux', f.node)
        m2.add(b'bar/qux', f.node)
        m2.node = m2.sha1

        chunk = m2.to_chunk(self.RevChunk, m)
        GitHgHelper.store(b'manifest', chunk)

        c2 = Changeset()
        c2.parent1 = c.node
        c2.manifest = m2.node
        c2.author = b'Cinnabar test <cinnabar@test>'
        c2.timestamp = b'0'
        c2.utcoffset = b'0000'
        c2.files = [i.path for i in m2]
        c2.body = b'Test commit'
        c2.node = c2.sha1

        store.store_changeset(c2)
        c2_gen = store.changeset(c2.node)
        self.assertEqual(c2.raw_data, c2_gen.raw_data)

        commit = GitCommit(GitHgHelper.hg2git(c2.node))
        self.assertEqual(commit.body, c2.body)
        self.assertEqual(ct, one(Git.ls_tree(commit.tree, b'bar'))[2])