def transform_symbol(self, name, revision): """Transform a symbol according to the project's symbol transforms. Transform the symbol with the original name NAME and canonicalized revision number REVISION. Return the new symbol name or None if the symbol should be ignored entirely. Log the results of the symbol transform if necessary.""" old_name = name # Apply any user-defined symbol transforms to the symbol name: name = self.cvs_file.project.transform_symbol(self.cvs_file, name, revision) if name is None: # Ignore symbol: self.pdc.log_symbol_transform(old_name, None) logger.verbose(" symbol '%s'=%s ignored in %s" % ( old_name, revision, self.cvs_file.rcs_path, )) else: if name != old_name: self.pdc.log_symbol_transform(old_name, name) logger.verbose(" symbol '%s'=%s transformed to '%s' in %s" % ( old_name, revision, name, self.cvs_file.rcs_path, )) return name
def __init__(self, progname, cmd_args, pass_manager): """Process the command-line options, storing run options to SELF. PROGNAME is the name of the program, used in the usage string. CMD_ARGS is the list of command-line arguments passed to the program. PASS_MANAGER is an instance of PassManager, needed to help process the -p and --help-passes options.""" self.progname = progname self.cmd_args = cmd_args self.pass_manager = pass_manager self.start_pass = 1 self.end_pass = self.pass_manager.num_passes self.profiling = False self.projects = [] # A list of one list of SymbolStrategyRules for each project: self.project_symbol_strategy_rules = [] parser = self.parser = optparse.OptionParser( usage=usage, description=self.get_description(), add_help_option=False, ) # A place to record any options used that are incompatible with # --options: parser.set_default('options_incompatible_options', []) # Populate the options parser with the options, one group at a # time: parser.add_option_group(self._get_options_file_options_group()) parser.add_option_group(self._get_output_options_group()) parser.add_option_group(self._get_conversion_options_group()) parser.add_option_group(self._get_symbol_handling_options_group()) parser.add_option_group( self._get_subversion_properties_options_group()) parser.add_option_group(self._get_extraction_options_group()) parser.add_option_group(self._get_environment_options_group()) parser.add_option_group(self._get_partial_conversion_options_group()) parser.add_option_group(self._get_information_options_group()) (self.options, self.args) = parser.parse_args(args=self.cmd_args) # Now the log level has been set; log the time when the run started: logger.verbose( time.strftime('Conversion start time: %Y-%m-%d %I:%M:%S %Z', time.localtime(logger.start_time))) if self.options.options_file_found: # Check that no options that are incompatible with --options # were used: self.verify_option_compatibility() else: # --options was not specified. So do the main initialization # based on other command-line options: self.process_options() # Check for problems with the options: self.check_options()
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol is_tag = stats.tag_create_count > 0 is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0 if is_tag and is_branch: # Can't decide return symbol elif is_branch: logger.verbose( 'Converting symbol %s as a branch because it is always used ' 'as a branch.' % (symbol,) ) return Branch(symbol) elif is_tag: logger.verbose( 'Converting symbol %s as a tag because it is always used ' 'as a tag.' % (symbol,) ) return Tag(symbol) else: # The symbol didn't appear at all: return symbol
def _eliminate_trivial_duplicate_defs(self, symbol_defs): """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions. Duplicate definitions of symbol names have been seen in the wild, and they can also happen when --symbol-transform is used. If a symbol is defined to the same revision number repeatedly, then ignore all but the last definition.""" # Make a copy, since we have to iterate through the definitions # twice: symbol_defs = list(symbol_defs) # A map { (name, revision) : [index,...] } of the indexes where # symbol definitions name=revision were found: known_definitions = {} for (i, symbol_def) in enumerate(symbol_defs): known_definitions.setdefault(symbol_def, []).append(i) # A set of the indexes of entries that have to be removed from # symbol_defs: dup_indexes = set() for ((name, revision), indexes) in known_definitions.iteritems(): if len(indexes) > 1: logger.verbose( "in %r:\n" " symbol %s:%s defined multiple times; ignoring duplicates\n" % (self.cvs_file.rcs_path, name, revision,) ) dup_indexes.update(indexes[:-1]) for (i, symbol_def) in enumerate(symbol_defs): if i not in dup_indexes: yield symbol_def
def log(self, symbol): logger.verbose( 'Converting symbol %s as a branch because it matches regexp "%s".' % ( symbol, self.regexp.pattern, ))
def transform_symbol(self, name, revision): """Transform a symbol according to the project's symbol transforms. Transform the symbol with the original name NAME and canonicalized revision number REVISION. Return the new symbol name or None if the symbol should be ignored entirely. Log the results of the symbol transform if necessary.""" old_name = name # Apply any user-defined symbol transforms to the symbol name: name = self.cvs_file.project.transform_symbol( self.cvs_file, name, revision ) if name is None: # Ignore symbol: self.pdc.log_symbol_transform(old_name, None) logger.verbose( " symbol '%s'=%s ignored in %s" % (old_name, revision, self.cvs_file.rcs_path,) ) else: if name != old_name: self.pdc.log_symbol_transform(old_name, name) logger.verbose( " symbol '%s'=%s transformed to '%s' in %s" % (old_name, revision, name, self.cvs_file.rcs_path,) ) return name
def process_project(self, project): Ctx()._projects[project.id] = project pdc = _ProjectDataCollector(self, project) found_rcs_file = False for cvs_path in walk_repository( project, self.file_key_generator, self.record_fatal_error ): if isinstance(cvs_path, CVSDirectory): self.add_cvs_directory(cvs_path) else: cvs_file_items = pdc.process_file(cvs_path) self._process_cvs_file_items(cvs_file_items) found_rcs_file = True if not found_rcs_file: self.record_fatal_error( 'No RCS files found under %r!\n' 'Are you absolutely certain you are pointing cvs2svn\n' 'at a CVS repository?\n' % (project.project_cvs_repos_path,) ) pdc.summarize_symbol_transforms() self.num_files += pdc.num_files logger.verbose('Processed', self.num_files, 'files')
def _show_copy(self, src_path, dest_path, src_revnum): """Print a line stating that we are 'copying' revision SRC_REVNUM of SRC_PATH to DEST_PATH.""" logger.verbose( " Copying revision %d of %s\n" " to %s\n" % (src_revnum, src_path, dest_path,) )
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol elif stats.branch_commit_count > 0: logger.verbose( 'Converting symbol %s as a branch because there are commits on it.' % (symbol, )) return Branch(symbol) else: return symbol
def __init__(self, progname, cmd_args, pass_manager): """Process the command-line options, storing run options to SELF. PROGNAME is the name of the program, used in the usage string. CMD_ARGS is the list of command-line arguments passed to the program. PASS_MANAGER is an instance of PassManager, needed to help process the -p and --help-passes options.""" self.progname = progname self.cmd_args = cmd_args self.pass_manager = pass_manager self.start_pass = 1 self.end_pass = self.pass_manager.num_passes self.profiling = False self.projects = [] # A list of one list of SymbolStrategyRules for each project: self.project_symbol_strategy_rules = [] parser = self.parser = optparse.OptionParser( usage=usage, description=self.get_description(), add_help_option=False ) # A place to record any options used that are incompatible with # --options: parser.set_default("options_incompatible_options", []) # Populate the options parser with the options, one group at a # time: parser.add_option_group(self._get_options_file_options_group()) parser.add_option_group(self._get_output_options_group()) parser.add_option_group(self._get_conversion_options_group()) parser.add_option_group(self._get_symbol_handling_options_group()) parser.add_option_group(self._get_subversion_properties_options_group()) parser.add_option_group(self._get_extraction_options_group()) parser.add_option_group(self._get_environment_options_group()) parser.add_option_group(self._get_partial_conversion_options_group()) parser.add_option_group(self._get_information_options_group()) (self.options, self.args) = parser.parse_args(args=self.cmd_args) # Now the log level has been set; log the time when the run started: logger.verbose(time.strftime("Conversion start time: %Y-%m-%d %I:%M:%S %Z", time.localtime(logger.start_time))) if self.options.options_file_found: # Check that no options that are incompatible with --options # were used: self.verify_option_compatibility() else: # --options was not specified. So do the main initialization # based on other command-line options: self.process_options() # Check for problems with the options: self.check_options()
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol if stats.tag_create_count == 0 \ and stats.branch_create_count == stats.trivial_import_count: logger.verbose( 'Excluding branch %s because it is a trivial import branch.' % (symbol, )) return ExcludedSymbol(symbol) else: return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol elif stats.branch_commit_count > 0: logger.verbose( 'Converting symbol %s as a branch because there are commits on it.' % (symbol,) ) return Branch(symbol) else: return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol if stats.tag_create_count == 0 \ and stats.branch_create_count == stats.pure_ntdb_count: logger.verbose( 'Excluding branch %s because it is a pure vendor branch.' % (symbol, )) return ExcludedSymbol(symbol) else: return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None: preferred_parent = self._get_preferred_parent(stats) if preferred_parent is None: logger.verbose('%s has no preferred parent' % (symbol,)) else: symbol.preferred_parent_id = preferred_parent.id logger.verbose( 'The preferred parent of %s is %s' % (symbol, preferred_parent,) ) return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol if stats.tag_create_count == 0 \ and stats.branch_create_count == stats.pure_ntdb_count: logger.verbose( 'Excluding branch %s because it is a pure vendor branch.' % (symbol,) ) return ExcludedSymbol(symbol) else: return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol if stats.tag_create_count == 0 \ and stats.branch_create_count == stats.trivial_import_count: logger.verbose( 'Excluding branch %s because it is a trivial import branch.' % (symbol,) ) return ExcludedSymbol(symbol) else: return symbol
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol elif stats.tag_create_count >= stats.branch_create_count: logger.verbose( 'Converting symbol %s as a tag because it is more often used ' 'as a tag.' % (symbol, )) return Tag(symbol) else: logger.verbose( 'Converting symbol %s as a branch because it is more often used ' 'as a branch.' % (symbol, )) return Branch(symbol)
def get_symbol(self, symbol, stats): if isinstance(symbol, (Trunk, TypedSymbol)): return symbol elif stats.tag_create_count >= stats.branch_create_count: logger.verbose( 'Converting symbol %s as a tag because it is more often used ' 'as a tag.' % (symbol,) ) return Tag(symbol) else: logger.verbose( 'Converting symbol %s as a branch because it is more often used ' 'as a branch.' % (symbol,) ) return Branch(symbol)
def get(self, timestamp, change_expected): """Return a reasonable timestamp derived from TIMESTAMP. Push TIMESTAMP into the future if necessary to ensure that it is at least one second later than every other timestamp that has been returned by previous calls to this method. If CHANGE_EXPECTED is not True, then log a message if the timestamp has to be changed.""" if timestamp > self.max_timestamp: # If a timestamp is in the future, it is assumed that it is # bogus. Shift it backwards in time to prevent it forcing other # timestamps to be pushed even further in the future. # Note that this is not nearly a complete solution to the bogus # timestamp problem. A timestamp in the future still affects # the ordering of changesets, and a changeset having such a # timestamp will not be committed until all changesets with # earlier timestamps have been committed, even if other # changesets with even earlier timestamps depend on this one. self.timestamp = self.timestamp + 1.0 if not change_expected: logger.warn( 'Timestamp "%s" is in the future; changed to "%s".' % ( time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)), )) elif timestamp < self.timestamp + 1.0: self.timestamp = self.timestamp + 1.0 if not change_expected and logger.is_on(logger.VERBOSE): logger.verbose( 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' % ( time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)), )) else: self.timestamp = timestamp return self.timestamp
def get(self, timestamp, change_expected): """Return a reasonable timestamp derived from TIMESTAMP. Push TIMESTAMP into the future if necessary to ensure that it is at least one second later than every other timestamp that has been returned by previous calls to this method. If CHANGE_EXPECTED is not True, then log a message if the timestamp has to be changed.""" if timestamp > self.max_timestamp: # If a timestamp is in the future, it is assumed that it is # bogus. Shift it backwards in time to prevent it forcing other # timestamps to be pushed even further in the future. # Note that this is not nearly a complete solution to the bogus # timestamp problem. A timestamp in the future still affects # the ordering of changesets, and a changeset having such a # timestamp will not be committed until all changesets with # earlier timestamps have been committed, even if other # changesets with even earlier timestamps depend on this one. self.timestamp = self.timestamp + 1.0 if not change_expected: logger.warn( 'Timestamp "%s" is in the future; changed to "%s".' % (time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)),) ) elif timestamp < self.timestamp + 1.0: self.timestamp = self.timestamp + 1.0 if not change_expected and logger.is_on(logger.VERBOSE): logger.verbose( 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' % (time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)),) ) else: self.timestamp = timestamp return self.timestamp
def process_project(self, project, cvs_paths): pdc = _ProjectDataCollector(self, project) found_rcs_file = False for cvs_path in cvs_paths: if isinstance(cvs_path, CVSDirectory): self.add_cvs_directory(cvs_path) else: cvs_file_items = pdc.process_file(cvs_path) self._process_cvs_file_items(cvs_file_items) found_rcs_file = True if not found_rcs_file: self.record_fatal_error( 'No RCS files found under %r!\n' 'Are you absolutely certain you are pointing cvs2svn\n' 'at a CVS repository?\n' % (project.project_cvs_repos_path, )) pdc.summarize_symbol_transforms() self.num_files += pdc.num_files logger.verbose('Processed', self.num_files, 'files')
def choose_garbage_collection_policy(): """Return the of GarbageCollectionPolicy to be used. For CPython, we want to use NoGarbageCollectionPolicy. But other Python implementations (e.g., Jython, PyPy, IronPython) do not necessarily use reference-counting for memory management, in which case it is not possible to turn off the garbage collector. So on those platforms, use the DefaultGarbageCollectionPolicy.""" try: implementation = platform.python_implementation() except AttributeError: # platform.python_implementation() was only added in Python 2.6. # So if that call failed, we should leave garbage collection on # just to be on the safe side. implementation = None if implementation == 'CPython': return NoGarbageCollectionPolicy() else: logger.verbose('Leaving Python garbage collection at its default settings') return DefaultGarbageCollectionPolicy()
def decode(self, s): """Try to decode string S using our configured source encodings. Return the string as a Unicode string. If S is already a unicode string, do nothing. Raise UnicodeError if the string cannot be decoded using any of the source encodings and no fallback encoding was specified.""" if isinstance(s, unicode): return s for (name, decoder) in self.decoders: try: return decoder(s)[0] except ValueError: logger.verbose("Encoding '%s' failed for string %r" % (name, s)) if self.fallback_decoder is not None: (name, decoder) = self.fallback_decoder return decoder(s, 'replace')[0] else: raise UnicodeError()
def choose_garbage_collection_policy(): """Return the of GarbageCollectionPolicy to be used. For CPython, we want to use NoGarbageCollectionPolicy. But other Python implementations (e.g., Jython, PyPy, IronPython) do not necessarily use reference-counting for memory management, in which case it is not possible to turn off the garbage collector. So on those platforms, use the DefaultGarbageCollectionPolicy.""" try: implementation = platform.python_implementation() except AttributeError: # platform.python_implementation() was only added in Python 2.6. # So if that call failed, we should leave garbage collection on # just to be on the safe side. implementation = None if implementation == 'CPython': return NoGarbageCollectionPolicy() else: logger.verbose( 'Leaving Python garbage collection at its default settings') return DefaultGarbageCollectionPolicy()
def _eliminate_trivial_duplicate_defs(self, symbol_defs): """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions. Duplicate definitions of symbol names have been seen in the wild, and they can also happen when --symbol-transform is used. If a symbol is defined to the same revision number repeatedly, then ignore all but the last definition.""" # Make a copy, since we have to iterate through the definitions # twice: symbol_defs = list(symbol_defs) # A map { (name, revision) : [index,...] } of the indexes where # symbol definitions name=revision were found: known_definitions = {} for (i, symbol_def) in enumerate(symbol_defs): known_definitions.setdefault(symbol_def, []).append(i) # A set of the indexes of entries that have to be removed from # symbol_defs: dup_indexes = set() for ((name, revision), indexes) in known_definitions.iteritems(): if len(indexes) > 1: logger.verbose( "in %r:\n" " symbol %s:%s defined multiple times; ignoring duplicates\n" % ( self.cvs_file.rcs_path, name, revision, )) dup_indexes.update(indexes[:-1]) for (i, symbol_def) in enumerate(symbol_defs): if i not in dup_indexes: yield symbol_def
def _process_revision_changeset(self, changeset, timestamp): """Process CHANGESET, using TIMESTAMP as the commit time. Create and yield one or more SVNCommits in the process. CHANGESET must be an OrderedChangeset. TIMESTAMP is used as the timestamp for any resulting SVNCommits.""" if not changeset.cvs_item_ids: logger.warn('Changeset has no items: %r' % changeset) return logger.verbose('-' * 60) logger.verbose('CVS Revision grouping:') logger.verbose(' Time: %s' % time.ctime(timestamp)) # Generate an SVNCommit unconditionally. Even if the only change in # this group of CVSRevisions is a deletion of an already-deleted # file (that is, a CVS revision in state 'dead' whose predecessor # was also in state 'dead'), the conversion will still generate a # Subversion revision containing the log message for the second dead # revision, because we don't want to lose that information. cvs_revs = list(changeset.iter_cvs_items()) if cvs_revs: cvs_revs.sort( lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path)) svn_commit = SVNPrimaryCommit(cvs_revs, timestamp, self.revnum_generator.gen_id()) yield svn_commit for cvs_rev in cvs_revs: Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum) # Generate an SVNPostCommit if we have default branch revs. If # some of the revisions in this commit happened on a non-trunk # default branch, then those files have to be copied into trunk # manually after being changed on the branch (because the RCS # "default branch" appears as head, i.e., trunk, in practice). # Unfortunately, Subversion doesn't support copies with sources # in the current txn. All copies must be based in committed # revisions. Therefore, we generate the copies in a new # revision. for svn_post_commit in self._post_commit(cvs_revs, svn_commit.revnum, timestamp): yield svn_post_commit
def _process_revision_changeset(self, changeset, timestamp): """Process CHANGESET, using TIMESTAMP as the commit time. Create and yield one or more SVNCommits in the process. CHANGESET must be an OrderedChangeset. TIMESTAMP is used as the timestamp for any resulting SVNCommits.""" if not changeset.cvs_item_ids: logger.warn('Changeset has no items: %r' % changeset) return logger.verbose('-' * 60) logger.verbose('CVS Revision grouping:') logger.verbose(' Time: %s' % time.ctime(timestamp)) # Generate an SVNCommit unconditionally. Even if the only change in # this group of CVSRevisions is a deletion of an already-deleted # file (that is, a CVS revision in state 'dead' whose predecessor # was also in state 'dead'), the conversion will still generate a # Subversion revision containing the log message for the second dead # revision, because we don't want to lose that information. cvs_revs = list(changeset.iter_cvs_items()) if cvs_revs: cvs_revs.sort(lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path)) svn_commit = SVNPrimaryCommit( cvs_revs, timestamp, self.revnum_generator.gen_id() ) yield svn_commit for cvs_rev in cvs_revs: Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum) # Generate an SVNPostCommit if we have default branch revs. If # some of the revisions in this commit happened on a non-trunk # default branch, then those files have to be copied into trunk # manually after being changed on the branch (because the RCS # "default branch" appears as head, i.e., trunk, in practice). # Unfortunately, Subversion doesn't support copies with sources # in the current txn. All copies must be based in committed # revisions. Therefore, we generate the copies in a new # revision. for svn_post_commit in self._post_commit( cvs_revs, svn_commit.revnum, timestamp ): yield svn_post_commit
def delete_path(self, lod, cvs_path): """Print a line stating that we are 'deleting' PATH.""" logger.verbose(" Deleting %s" % (lod.get_path(cvs_path.cvs_path),))
def __init__(self): logger.verbose('Disabling the Python garbage collector (it is unneeded)') gc.disable()
def __init__(self): logger.verbose( 'Leaving the Python garbage collector at its default settings' )
def exclude(symbol): logger.verbose('Excluding symbol %s because of manual setting.' % (symbol, )) return ExcludedSymbol(symbol)
def cleanup(self): logger.verbose("Deleting", self.filename) os.unlink(self.filename)
def exclude(symbol): logger.verbose( 'Excluding symbol %s because of manual setting.' % (symbol,) ) return ExcludedSymbol(symbol)
def convert_as_tag(symbol): logger.verbose('Converting symbol %s as a tag because of manual setting.' % (symbol, )) return Tag(symbol)
def change_path(self, cvs_rev): """Print a line stating what path we are 'changing'.""" logger.verbose(" Changing %s" % (cvs_rev.get_svn_path(),))
def __init__(self): logger.verbose( 'Disabling the Python garbage collector (it is unneeded)') gc.disable()
def delete_lod(self, lod): """Print a line stating that we are 'deleting' LOD.""" logger.verbose(" Deleting %s" % (lod.get_path(),))
def log(self, symbol): logger.verbose( 'Converting symbol %s as a tag because it matches regexp "%s".' % (symbol, self.regexp.pattern,) )
def log(self, symbol): logger.verbose( 'Excluding symbol %s because it matches regexp "%s".' % (symbol, self.regexp.pattern,) )
def log(self, symbol): logger.verbose('Excluding symbol %s because it matches regexp "%s".' % ( symbol, self.regexp.pattern, ))
def log(self, symbol): logger.verbose( 'Converting symbol %s as a tag because no other rules applied.' % (symbol,) )
def log(self, symbol): logger.verbose( 'Excluding symbol %s by catch-all rule.' % (symbol,) )
def log(self, symbol): logger.verbose( 'Converting symbol %s as a tag because no other rules applied.' % (symbol, ))
def log(self, symbol): logger.verbose('Excluding symbol %s by catch-all rule.' % (symbol, ))
def __init__(self): logger.verbose( 'Leaving the Python garbage collector at its default settings')
def convert_as_branch(symbol): logger.verbose( 'Converting symbol %s as a branch because of manual setting.' % (symbol, )) return Branch(symbol)
def convert_as_branch(symbol): logger.verbose( 'Converting symbol %s as a branch because of manual setting.' % (symbol,) ) return Branch(symbol)
def add_path(self, cvs_rev): """Print a line stating what path we are 'adding'.""" logger.verbose(" Adding %s" % (cvs_rev.get_svn_path(),))
def convert_as_tag(symbol): logger.verbose( 'Converting symbol %s as a tag because of manual setting.' % (symbol,) ) return Tag(symbol)
def mkdir(self, lod, cvs_directory): logger.verbose( " New Directory %s" % (lod.get_path(cvs_directory.cvs_path),) )