class DVCSOutputOption(OutputOption): def __init__(self): self._mirror = RepositoryMirror() self._symbolings_reader = None def normalize_author_transforms(self, author_transforms): """Convert AUTHOR_TRANSFORMS into author strings. AUTHOR_TRANSFORMS is a dict { CVSAUTHOR : DVCSAUTHOR } where CVSAUTHOR is the CVS author and DVCSAUTHOR is either: * a tuple (NAME, EMAIL) where NAME and EMAIL are strings. Such entries are converted into a UTF-8 string of the form 'name <email>'. * a string already in the form 'name <email>'. Return a similar dict { CVSAUTHOR : DVCSAUTHOR } where all keys and values are UTF-8-encoded strings. Any of the input strings may be Unicode strings (in which case they are encoded to UTF-8) or 8-bit strings (in which case they are used as-is). Also turns None into the empty dict.""" result = {} if author_transforms is not None: for (cvsauthor, dvcsauthor) in author_transforms.iteritems(): cvsauthor = to_utf8(cvsauthor) if isinstance(dvcsauthor, basestring): dvcsauthor = to_utf8(dvcsauthor) else: (name, email,) = dvcsauthor name = to_utf8(name) email = to_utf8(email) dvcsauthor = "%s <%s>" % (name, email,) result[cvsauthor] = dvcsauthor return result def register_artifacts(self, which_pass): # These artifacts are needed for SymbolingsReader: artifact_manager.register_temp_file_needed( config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass ) artifact_manager.register_temp_file_needed( config.SYMBOL_OFFSETS_DB, which_pass ) self._mirror.register_artifacts(which_pass) def check(self): if Ctx().cross_project_commits: raise FatalError( '%s output is not supported with cross-project commits' % self.name ) if Ctx().cross_branch_commits: raise FatalError( '%s output is not supported with cross-branch commits' % self.name ) if Ctx().username is None: raise FatalError( '%s output requires a default commit username' % self.name ) def setup(self, svn_rev_count): self._symbolings_reader = SymbolingsReader() self._mirror.open() def cleanup(self): self._mirror.close() self._symbolings_reader.close() del self._symbolings_reader def _get_source_groups(self, svn_commit): """Return groups of sources for SVN_COMMIT. SVN_COMMIT is an instance of SVNSymbolCommit. Return a list of tuples (svn_revnum, source_lod, cvs_symbols) where svn_revnum is the revision that should serve as a source, source_lod is the CVS line of development, and cvs_symbols is a list of CVSSymbolItems that can be copied from that source. The list is in arbitrary order.""" # Get a map {CVSSymbol : SVNRevisionRange}: range_map = self._symbolings_reader.get_range_map(svn_commit) # range_map, split up into one map per LOD; i.e., {LOD : # {CVSSymbol : SVNRevisionRange}}: lod_range_maps = {} for (cvs_symbol, range) in range_map.iteritems(): lod_range_map = lod_range_maps.get(range.source_lod) if lod_range_map is None: lod_range_map = {} lod_range_maps[range.source_lod] = lod_range_map lod_range_map[cvs_symbol] = range # Sort the sources so that the branch that serves most often as # parent is processed first: lod_ranges = lod_range_maps.items() lod_ranges.sort( lambda (lod1,lod_range_map1),(lod2,lod_range_map2): -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2) ) source_groups = [] for (lod, lod_range_map) in lod_ranges: while lod_range_map: revision_scores = RevisionScores(lod_range_map.values()) (source_lod, revnum, score) = revision_scores.get_best_revnum() assert source_lod == lod cvs_symbols = [] for (cvs_symbol, range) in lod_range_map.items(): if revnum in range: cvs_symbols.append(cvs_symbol) del lod_range_map[cvs_symbol] source_groups.append((revnum, lod, cvs_symbols)) return source_groups def _is_simple_copy(self, svn_commit, source_groups): """Return True iff SVN_COMMIT can be created as a simple copy. SVN_COMMIT is an SVNTagCommit. Return True iff it can be created as a simple copy from an existing revision (i.e., if the fixup branch can be avoided for this tag creation).""" # The first requirement is that there be exactly one source: if len(source_groups) != 1: return False (svn_revnum, source_lod, cvs_symbols) = source_groups[0] # The second requirement is that the destination LOD not already # exist: try: self._mirror.get_current_lod_directory(svn_commit.symbol) except KeyError: # The LOD doesn't already exist. This is good. pass else: # The LOD already exists. It cannot be created by a copy. return False # The third requirement is that the source LOD contains exactly # the same files as we need to add to the symbol: try: source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum) except KeyError: raise InternalError('Source %r does not exist' % (source_lod,)) return ( set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols]) == set(self._get_all_files(source_node)) ) def _get_all_files(self, node): """Generate all of the CVSFiles under NODE.""" for cvs_path in node: subnode = node[cvs_path] if subnode is None: yield cvs_path else: for sub_cvs_path in self._get_all_files(subnode): yield sub_cvs_path
class GitOutputOption(OutputOption): """An OutputOption that outputs to a git-fast-import formatted file. Members: dump_filename -- (string) the name of the file to which the git-fast-import commands for defining revisions will be written. author_transforms -- a map {cvsauthor : (fullname, email)} from CVS author names to git full name and email address. All of the contents are 8-bit strings encoded as UTF-8. """ # The first mark number used for git-fast-import commit marks. This # value needs to be large to avoid conflicts with blob marks. _first_commit_mark = 1000000000 def __init__( self, dump_filename, revision_writer, max_merges=None, author_transforms=None, ): """Constructor. DUMP_FILENAME is the name of the file to which the git-fast-import commands for defining revisions should be written. (Please note that depending on the style of revision writer, the actual file contents might not be written to this file.) REVISION_WRITER is a GitRevisionWriter that is used to output either the content of revisions or a mark that was previously used to label a blob. MAX_MERGES can be set to an integer telling the maximum number of parents that can be merged into a commit at once (aside from the natural parent). If it is set to None, then there is no limit. AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from CVS author names to git full name and email address. All of the contents should either be Unicode strings or 8-bit strings encoded as UTF-8. """ self.dump_filename = dump_filename self.revision_writer = revision_writer self.max_merges = max_merges def to_utf8(s): if isinstance(s, unicode): return s.encode('utf8') else: return s self.author_transforms = {} if author_transforms is not None: for (cvsauthor, ( name, email, )) in author_transforms.iteritems(): cvsauthor = to_utf8(cvsauthor) name = to_utf8(name) email = to_utf8(email) self.author_transforms[cvsauthor] = ( name, email, ) self._mirror = RepositoryMirror() self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark) def register_artifacts(self, which_pass): # These artifacts are needed for SymbolingsReader: artifact_manager.register_temp_file_needed( config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass) artifact_manager.register_temp_file_needed(config.SYMBOL_OFFSETS_DB, which_pass) self.revision_writer.register_artifacts(which_pass) self._mirror.register_artifacts(which_pass) def check(self): if Ctx().cross_project_commits: raise FatalError( 'Git output is not supported with cross-project commits') if Ctx().cross_branch_commits: raise FatalError( 'Git output is not supported with cross-branch commits') if Ctx().username is None: raise FatalError('Git output requires a default commit username') def check_symbols(self, symbol_map): # FIXME: What constraints does git impose on symbols? pass def setup(self, svn_rev_count): self._symbolings_reader = SymbolingsReader() self.f = open(self.dump_filename, 'wb') # The youngest revnum that has been committed so far: self._youngest = 0 # A map {lod : [(revnum, mark)]} giving each of the revision # numbers in which there was a commit to lod, and the mark active # at the end of the revnum. self._marks = {} self._mirror.open() self.revision_writer.start(self.f, self._mirror) def _create_commit_mark(self, lod, revnum): mark = self._mark_generator.gen_id() self._set_lod_mark(lod, revnum, mark) return mark def _set_lod_mark(self, lod, revnum, mark): """Record MARK as the status of LOD for REVNUM. If there is already an entry for REVNUM, overwrite it. If not, append a new entry to the self._marks list for LOD.""" assert revnum >= self._youngest entry = (revnum, mark) try: modifications = self._marks[lod] except KeyError: # This LOD hasn't appeared before; create a new list and add the # entry: self._marks[lod] = [entry] else: # A record exists, so it necessarily has at least one element: if modifications[-1][0] == revnum: modifications[-1] = entry else: modifications.append(entry) self._youngest = revnum def _get_author(self, svn_commit): """Return the author to be used for SVN_COMMIT. Return the author in the form needed by git; that is, 'foo <bar>'.""" author = svn_commit.get_author() ( name, email, ) = self.author_transforms.get(author, ( author, author, )) return '%s <%s>' % ( name, email, ) @staticmethod def _get_log_msg(svn_commit): return svn_commit.get_log_msg() def process_initial_project_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) self._mirror.end_commit() def process_primary_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) lods = set() for cvs_rev in svn_commit.get_cvs_items(): lods.add(cvs_rev.lod) if len(lods) != 1: raise InternalError('Commit affects %d LODs' % (len(lods), )) lod = lods.pop() self._mirror.start_commit(svn_commit.revnum) if isinstance(lod, Trunk): # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') else: self.f.write('commit refs/heads/%s\n' % (lod.name, )) self.f.write('mark :%d\n' % (self._create_commit_mark(lod, svn_commit.revnum), )) self.f.write('committer %s %d +0000\n' % ( author, svn_commit.date, )) self.f.write('data %d\n' % (len(log_msg), )) self.f.write('%s\n' % (log_msg, )) for cvs_rev in svn_commit.get_cvs_items(): self.revision_writer.process_revision(cvs_rev, post_commit=False) self.f.write('\n') self._mirror.end_commit() def process_post_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) source_lods = set() for cvs_rev in svn_commit.cvs_revs: source_lods.add(cvs_rev.lod) if len(source_lods) != 1: raise InternalError('Commit is from %d LODs' % (len(source_lods), )) source_lod = source_lods.pop() self._mirror.start_commit(svn_commit.revnum) # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') self.f.write('mark :%d\n' % (self._create_commit_mark(None, svn_commit.revnum), )) self.f.write('committer %s %d +0000\n' % ( author, svn_commit.date, )) self.f.write('data %d\n' % (len(log_msg), )) self.f.write('%s\n' % (log_msg, )) self.f.write('merge :%d\n' % (self._get_source_mark(source_lod, svn_commit.revnum), )) for cvs_rev in svn_commit.cvs_revs: self.revision_writer.process_revision(cvs_rev, post_commit=True) self.f.write('\n') self._mirror.end_commit() def _get_source_groups(self, svn_commit): """Return groups of sources for SVN_COMMIT. SVN_COMMIT is an instance of SVNSymbolCommit. Yield tuples (source_lod, svn_revnum, cvs_symbols) where source_lod is the line of development and svn_revnum is the revision that should serve as a source, and cvs_symbols is a list of CVSSymbolItems that can be copied from that source. The groups are returned in arbitrary order.""" # Get a map {CVSSymbol : SVNRevisionRange}: range_map = self._symbolings_reader.get_range_map(svn_commit) # range_map, split up into one map per LOD; i.e., {LOD : # {CVSSymbol : SVNRevisionRange}}: lod_range_maps = {} for (cvs_symbol, range) in range_map.iteritems(): lod_range_map = lod_range_maps.get(range.source_lod) if lod_range_map is None: lod_range_map = {} lod_range_maps[range.source_lod] = lod_range_map lod_range_map[cvs_symbol] = range # Sort the sources so that the branch that serves most often as # parent is processed first: lod_ranges = lod_range_maps.items() lod_ranges.sort( lambda (lod1, lod_range_map1), (lod2, lod_range_map2): -cmp( len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)) for (lod, lod_range_map) in lod_ranges: while lod_range_map: revision_scores = RevisionScores(lod_range_map.values()) (source_lod, revnum, score) = revision_scores.get_best_revnum() assert source_lod == lod cvs_symbols = [] for (cvs_symbol, range) in lod_range_map.items(): if revnum in range: cvs_symbols.append(cvs_symbol) del lod_range_map[cvs_symbol] yield (lod, revnum, cvs_symbols) def _get_all_files(self, node): """Generate all of the CVSFiles under NODE.""" for cvs_path in node: subnode = node[cvs_path] if subnode is None: yield cvs_path else: for sub_cvs_path in self._get_all_files(subnode): yield sub_cvs_path def _is_simple_copy(self, svn_commit, source_groups): """Return True iff SVN_COMMIT can be created as a simple copy. SVN_COMMIT is an SVNTagCommit. Return True iff it can be created as a simple copy from an existing revision (i.e., if the fixup branch can be avoided for this tag creation).""" # The first requirement is that there be exactly one source: if len(source_groups) != 1: return False (source_lod, svn_revnum, cvs_symbols) = source_groups[0] # The second requirement is that the destination LOD not already # exist: try: self._mirror.get_current_lod_directory(svn_commit.symbol) except KeyError: # The LOD doesn't already exist. This is good. pass else: # The LOD already exists. It cannot be created by a copy. return False # The third requirement is that the source LOD contains exactly # the same files as we need to add to the symbol: try: source_node = self._mirror.get_old_lod_directory( source_lod, svn_revnum) except KeyError: raise InternalError('Source %r does not exist' % (source_lod, )) return (set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols ]) == set(self._get_all_files(source_node))) def _get_source_mark(self, source_lod, revnum): """Return the mark active on SOURCE_LOD at the end of REVNUM.""" modifications = self._marks[source_lod] i = bisect.bisect_left(modifications, (revnum + 1, )) - 1 (revnum, mark) = modifications[i] return mark def _process_symbol_commit(self, svn_commit, git_branch, source_groups, mark): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) self.f.write('commit %s\n' % (git_branch, )) self.f.write('mark :%d\n' % (mark, )) self.f.write('committer %s %d +0000\n' % ( author, svn_commit.date, )) self.f.write('data %d\n' % (len(log_msg), )) self.f.write('%s\n' % (log_msg, )) for ( source_lod, source_revnum, cvs_symbols, ) in source_groups: self.f.write('merge :%d\n' % (self._get_source_mark(source_lod, source_revnum), )) for ( source_lod, source_revnum, cvs_symbols, ) in source_groups: for cvs_symbol in cvs_symbols: self.revision_writer.branch_file(cvs_symbol) self.f.write('\n') def process_branch_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) for groups in get_chunks(source_groups, self.max_merges): self._process_symbol_commit( svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name, ), groups, self._create_commit_mark(svn_commit.symbol, svn_commit.revnum), ) self._mirror.end_commit() def _set_symbol(self, symbol, mark): if isinstance(symbol, Branch): category = 'heads' elif isinstance(symbol, Tag): category = 'tags' else: raise InternalError() self.f.write('reset refs/%s/%s\n' % ( category, symbol.name, )) self.f.write('from :%d\n' % (mark, )) def process_tag_commit(self, svn_commit): # FIXME: For now we create a fixup branch with the same name as # the tag, then the tag. We never delete the fixup branch. Also, # a fixup branch is created even if the tag could be created from # a single source. self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) if self._is_simple_copy(svn_commit, source_groups): (source_lod, source_revnum, cvs_symbols) = source_groups[0] Log().debug('%s will be created via a simple copy from %s:r%d' % ( svn_commit.symbol, source_lod, source_revnum, )) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) else: Log().debug('%s will be created via a fixup branch' % (svn_commit.symbol, )) # Create the fixup branch (which might involve making more than # one commit): for groups in get_chunks(source_groups, self.max_merges): mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum) self._process_symbol_commit(svn_commit, FIXUP_BRANCH_NAME, groups, mark) # Store the mark of the last commit to the fixup branch as the # value of the tag: self._set_symbol(svn_commit.symbol, mark) self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME, )) self.f.write('\n') self._mirror.end_commit() def cleanup(self): self.revision_writer.finish() self._mirror.close() self.f.close() del self.f self._symbolings_reader.close() del self._symbolings_reader
class DVCSOutputOption(OutputOption): def __init__(self): self._mirror = RepositoryMirror() self._symbolings_reader = None def normalize_author_transforms(self, author_transforms): """Convert AUTHOR_TRANSFORMS into author strings. AUTHOR_TRANSFORMS is a dict { CVSAUTHOR : DVCSAUTHOR } where CVSAUTHOR is the CVS author and DVCSAUTHOR is either: * a tuple (NAME, EMAIL) where NAME and EMAIL are strings. Such entries are converted into a UTF-8 string of the form 'name <email>'. * a string already in the form 'name <email>'. Return a similar dict { CVSAUTHOR : DVCSAUTHOR } where all keys and values are UTF-8-encoded strings. Any of the input strings may be Unicode strings (in which case they are encoded to UTF-8) or 8-bit strings (in which case they are used as-is). Also turns None into the empty dict.""" result = {} if author_transforms is not None: for (cvsauthor, dvcsauthor) in author_transforms.iteritems(): cvsauthor = to_utf8(cvsauthor) if isinstance(dvcsauthor, basestring): dvcsauthor = to_utf8(dvcsauthor) else: ( name, email, ) = dvcsauthor name = to_utf8(name) email = to_utf8(email) dvcsauthor = "%s <%s>" % ( name, email, ) result[cvsauthor] = dvcsauthor return result def register_artifacts(self, which_pass): # These artifacts are needed for SymbolingsReader: artifact_manager.register_temp_file_needed( config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass) artifact_manager.register_temp_file_needed(config.SYMBOL_OFFSETS_DB, which_pass) self._mirror.register_artifacts(which_pass) def check(self): if Ctx().cross_project_commits: raise FatalError( '%s output is not supported with cross-project commits' % self.name) if Ctx().cross_branch_commits: raise FatalError( '%s output is not supported with cross-branch commits' % self.name) if Ctx().username is None: raise FatalError('%s output requires a default commit username' % self.name) def setup(self, svn_rev_count): self._symbolings_reader = SymbolingsReader() self._mirror.open() def cleanup(self): self._mirror.close() self._symbolings_reader.close() del self._symbolings_reader def _get_source_groups(self, svn_commit): """Return groups of sources for SVN_COMMIT. SVN_COMMIT is an instance of SVNSymbolCommit. Return a list of tuples (svn_revnum, source_lod, cvs_symbols) where svn_revnum is the revision that should serve as a source, source_lod is the CVS line of development, and cvs_symbols is a list of CVSSymbolItems that can be copied from that source. The list is in arbitrary order.""" # Get a map {CVSSymbol : SVNRevisionRange}: range_map = self._symbolings_reader.get_range_map(svn_commit) # range_map, split up into one map per LOD; i.e., {LOD : # {CVSSymbol : SVNRevisionRange}}: lod_range_maps = {} for (cvs_symbol, range) in range_map.iteritems(): lod_range_map = lod_range_maps.get(range.source_lod) if lod_range_map is None: lod_range_map = {} lod_range_maps[range.source_lod] = lod_range_map lod_range_map[cvs_symbol] = range # Sort the sources so that the branch that serves most often as # parent is processed first: lod_ranges = lod_range_maps.items() lod_ranges.sort( lambda (lod1, lod_range_map1), (lod2, lod_range_map2): -cmp( len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)) source_groups = [] for (lod, lod_range_map) in lod_ranges: while lod_range_map: revision_scores = RevisionScores(lod_range_map.values()) (source_lod, revnum, score) = revision_scores.get_best_revnum() assert source_lod == lod cvs_symbols = [] for (cvs_symbol, range) in lod_range_map.items(): if revnum in range: cvs_symbols.append(cvs_symbol) del lod_range_map[cvs_symbol] source_groups.append((revnum, lod, cvs_symbols)) return source_groups def _is_simple_copy(self, svn_commit, source_groups): """Return True iff SVN_COMMIT can be created as a simple copy. SVN_COMMIT is an SVNTagCommit. Return True iff it can be created as a simple copy from an existing revision (i.e., if the fixup branch can be avoided for this tag creation).""" # The first requirement is that there be exactly one source: if len(source_groups) != 1: return False (svn_revnum, source_lod, cvs_symbols) = source_groups[0] # The second requirement is that the destination LOD not already # exist: try: self._mirror.get_current_lod_directory(svn_commit.symbol) except KeyError: # The LOD doesn't already exist. This is good. pass else: # The LOD already exists. It cannot be created by a copy. return False # The third requirement is that the source LOD contains exactly # the same files as we need to add to the symbol: try: source_node = self._mirror.get_old_lod_directory( source_lod, svn_revnum) except KeyError: raise InternalError('Source %r does not exist' % (source_lod, )) return (set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols ]) == set(self._get_all_files(source_node))) def _get_all_files(self, node): """Generate all of the CVSFiles under NODE.""" for cvs_path in node: subnode = node[cvs_path] if subnode is None: yield cvs_path else: for sub_cvs_path in self._get_all_files(subnode): yield sub_cvs_path
class GitOutputOption(OutputOption): """An OutputOption that outputs to a git-fast-import formatted file. Members: dump_filename -- (string) the name of the file to which the git-fast-import commands for defining revisions will be written. author_transforms -- a map {cvsauthor : (fullname, email)} from CVS author names to git full name and email address. All of the contents are 8-bit strings encoded as UTF-8. """ # The first mark number used for git-fast-import commit marks. This # value needs to be large to avoid conflicts with blob marks. _first_commit_mark = 1000000000 def __init__( self, dump_filename, revision_writer, max_merges=None, author_transforms=None, ): """Constructor. DUMP_FILENAME is the name of the file to which the git-fast-import commands for defining revisions should be written. (Please note that depending on the style of revision writer, the actual file contents might not be written to this file.) REVISION_WRITER is a GitRevisionWriter that is used to output either the content of revisions or a mark that was previously used to label a blob. MAX_MERGES can be set to an integer telling the maximum number of parents that can be merged into a commit at once (aside from the natural parent). If it is set to None, then there is no limit. AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from CVS author names to git full name and email address. All of the contents should either be Unicode strings or 8-bit strings encoded as UTF-8. """ self.dump_filename = dump_filename self.revision_writer = revision_writer self.max_merges = max_merges def to_utf8(s): if isinstance(s, unicode): return s.encode('utf8') else: return s self.author_transforms = {} if author_transforms is not None: for (cvsauthor, (name, email,)) in author_transforms.iteritems(): cvsauthor = to_utf8(cvsauthor) name = to_utf8(name) email = to_utf8(email) self.author_transforms[cvsauthor] = (name, email,) self._mirror = RepositoryMirror() self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark) def register_artifacts(self, which_pass): # These artifacts are needed for SymbolingsReader: artifact_manager.register_temp_file_needed( config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass ) artifact_manager.register_temp_file_needed( config.SYMBOL_OFFSETS_DB, which_pass ) self.revision_writer.register_artifacts(which_pass) self._mirror.register_artifacts(which_pass) def check(self): if Ctx().cross_project_commits: raise FatalError( 'Git output is not supported with cross-project commits' ) if Ctx().cross_branch_commits: raise FatalError( 'Git output is not supported with cross-branch commits' ) if Ctx().username is None: raise FatalError( 'Git output requires a default commit username' ) def check_symbols(self, symbol_map): # FIXME: What constraints does git impose on symbols? pass def setup(self, svn_rev_count): self._symbolings_reader = SymbolingsReader() self.f = open(self.dump_filename, 'wb') # The youngest revnum that has been committed so far: self._youngest = 0 # A map {lod : [(revnum, mark)]} giving each of the revision # numbers in which there was a commit to lod, and the mark active # at the end of the revnum. self._marks = {} self._mirror.open() self.revision_writer.start(self.f, self._mirror) def _create_commit_mark(self, lod, revnum): mark = self._mark_generator.gen_id() self._set_lod_mark(lod, revnum, mark) return mark def _set_lod_mark(self, lod, revnum, mark): """Record MARK as the status of LOD for REVNUM. If there is already an entry for REVNUM, overwrite it. If not, append a new entry to the self._marks list for LOD.""" assert revnum >= self._youngest entry = (revnum, mark) try: modifications = self._marks[lod] except KeyError: # This LOD hasn't appeared before; create a new list and add the # entry: self._marks[lod] = [entry] else: # A record exists, so it necessarily has at least one element: if modifications[-1][0] == revnum: modifications[-1] = entry else: modifications.append(entry) self._youngest = revnum def _get_author(self, svn_commit): """Return the author to be used for SVN_COMMIT. Return the author in the form needed by git; that is, 'foo <bar>'.""" author = svn_commit.get_author() (name, email,) = self.author_transforms.get(author, (author, author,)) return '%s <%s>' % (name, email,) @staticmethod def _get_log_msg(svn_commit): return svn_commit.get_log_msg() def process_initial_project_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) self._mirror.end_commit() def process_primary_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) lods = set() for cvs_rev in svn_commit.get_cvs_items(): lods.add(cvs_rev.lod) if len(lods) != 1: raise InternalError('Commit affects %d LODs' % (len(lods),)) lod = lods.pop() self._mirror.start_commit(svn_commit.revnum) if isinstance(lod, Trunk): # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') else: self.f.write('commit refs/heads/%s\n' % (lod.name,)) self.f.write( 'mark :%d\n' % (self._create_commit_mark(lod, svn_commit.revnum),) ) self.f.write( 'committer %s %d +0000\n' % (author, svn_commit.date,) ) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) for cvs_rev in svn_commit.get_cvs_items(): self.revision_writer.process_revision(cvs_rev, post_commit=False) self.f.write('\n') self._mirror.end_commit() def process_post_commit(self, svn_commit): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) source_lods = set() for cvs_rev in svn_commit.cvs_revs: source_lods.add(cvs_rev.lod) if len(source_lods) != 1: raise InternalError('Commit is from %d LODs' % (len(source_lods),)) source_lod = source_lods.pop() self._mirror.start_commit(svn_commit.revnum) # FIXME: is this correct?: self.f.write('commit refs/heads/master\n') self.f.write( 'mark :%d\n' % (self._create_commit_mark(None, svn_commit.revnum),) ) self.f.write( 'committer %s %d +0000\n' % (author, svn_commit.date,) ) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) self.f.write( 'merge :%d\n' % (self._get_source_mark(source_lod, svn_commit.revnum),) ) for cvs_rev in svn_commit.cvs_revs: self.revision_writer.process_revision(cvs_rev, post_commit=True) self.f.write('\n') self._mirror.end_commit() def _get_source_groups(self, svn_commit): """Return groups of sources for SVN_COMMIT. SVN_COMMIT is an instance of SVNSymbolCommit. Yield tuples (source_lod, svn_revnum, cvs_symbols) where source_lod is the line of development and svn_revnum is the revision that should serve as a source, and cvs_symbols is a list of CVSSymbolItems that can be copied from that source. The groups are returned in arbitrary order.""" # Get a map {CVSSymbol : SVNRevisionRange}: range_map = self._symbolings_reader.get_range_map(svn_commit) # range_map, split up into one map per LOD; i.e., {LOD : # {CVSSymbol : SVNRevisionRange}}: lod_range_maps = {} for (cvs_symbol, range) in range_map.iteritems(): lod_range_map = lod_range_maps.get(range.source_lod) if lod_range_map is None: lod_range_map = {} lod_range_maps[range.source_lod] = lod_range_map lod_range_map[cvs_symbol] = range # Sort the sources so that the branch that serves most often as # parent is processed first: lod_ranges = lod_range_maps.items() lod_ranges.sort( lambda (lod1,lod_range_map1),(lod2,lod_range_map2): -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2) ) for (lod, lod_range_map) in lod_ranges: while lod_range_map: revision_scores = RevisionScores(lod_range_map.values()) (source_lod, revnum, score) = revision_scores.get_best_revnum() assert source_lod == lod cvs_symbols = [] for (cvs_symbol, range) in lod_range_map.items(): if revnum in range: cvs_symbols.append(cvs_symbol) del lod_range_map[cvs_symbol] yield (lod, revnum, cvs_symbols) def _get_all_files(self, node): """Generate all of the CVSFiles under NODE.""" for cvs_path in node: subnode = node[cvs_path] if subnode is None: yield cvs_path else: for sub_cvs_path in self._get_all_files(subnode): yield sub_cvs_path def _is_simple_copy(self, svn_commit, source_groups): """Return True iff SVN_COMMIT can be created as a simple copy. SVN_COMMIT is an SVNTagCommit. Return True iff it can be created as a simple copy from an existing revision (i.e., if the fixup branch can be avoided for this tag creation).""" # The first requirement is that there be exactly one source: if len(source_groups) != 1: return False (source_lod, svn_revnum, cvs_symbols) = source_groups[0] # The second requirement is that the destination LOD not already # exist: try: self._mirror.get_current_lod_directory(svn_commit.symbol) except KeyError: # The LOD doesn't already exist. This is good. pass else: # The LOD already exists. It cannot be created by a copy. return False # The third requirement is that the source LOD contains exactly # the same files as we need to add to the symbol: try: source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum) except KeyError: raise InternalError('Source %r does not exist' % (source_lod,)) return ( set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols]) == set(self._get_all_files(source_node)) ) def _get_source_mark(self, source_lod, revnum): """Return the mark active on SOURCE_LOD at the end of REVNUM.""" modifications = self._marks[source_lod] i = bisect.bisect_left(modifications, (revnum + 1,)) - 1 (revnum, mark) = modifications[i] return mark def _process_symbol_commit( self, svn_commit, git_branch, source_groups, mark ): author = self._get_author(svn_commit) log_msg = self._get_log_msg(svn_commit) self.f.write('commit %s\n' % (git_branch,)) self.f.write('mark :%d\n' % (mark,)) self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) for (source_lod, source_revnum, cvs_symbols,) in source_groups: self.f.write( 'merge :%d\n' % (self._get_source_mark(source_lod, source_revnum),) ) for (source_lod, source_revnum, cvs_symbols,) in source_groups: for cvs_symbol in cvs_symbols: self.revision_writer.branch_file(cvs_symbol) self.f.write('\n') def process_branch_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) for groups in get_chunks(source_groups, self.max_merges): self._process_symbol_commit( svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,), groups, self._create_commit_mark(svn_commit.symbol, svn_commit.revnum), ) self._mirror.end_commit() def _set_symbol(self, symbol, mark): if isinstance(symbol, Branch): category = 'heads' elif isinstance(symbol, Tag): category = 'tags' else: raise InternalError() self.f.write('reset refs/%s/%s\n' % (category, symbol.name,)) self.f.write('from :%d\n' % (mark,)) def process_tag_commit(self, svn_commit): # FIXME: For now we create a fixup branch with the same name as # the tag, then the tag. We never delete the fixup branch. Also, # a fixup branch is created even if the tag could be created from # a single source. self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) if self._is_simple_copy(svn_commit, source_groups): (source_lod, source_revnum, cvs_symbols) = source_groups[0] Log().debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) else: Log().debug( '%s will be created via a fixup branch' % (svn_commit.symbol,) ) # Create the fixup branch (which might involve making more than # one commit): for groups in get_chunks(source_groups, self.max_merges): mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum) self._process_symbol_commit( svn_commit, FIXUP_BRANCH_NAME, groups, mark ) # Store the mark of the last commit to the fixup branch as the # value of the tag: self._set_symbol(svn_commit.symbol, mark) self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,)) self.f.write('\n') self._mirror.end_commit() def cleanup(self): self.revision_writer.finish() self._mirror.close() self.f.close() del self.f self._symbolings_reader.close() del self._symbolings_reader