def _resolve_branch_dependencies(self): """Resolve dependencies involving branches.""" for branch_data in self.sdc.branches_data.values(): # The branch_data's parent has the branch as a child regardless # of whether the branch had any subsequent commits: try: parent_data = self._rev_data[branch_data.parent] except KeyError: logger.warn( 'In %r:\n' ' branch %r references non-existing revision %s\n' ' and will be ignored.' % ( self.cvs_file.rcs_path, branch_data.symbol.name, branch_data.parent, )) del self.sdc.branches_data[branch_data.branch_number] else: parent_data.branches_data.append(branch_data) # If the branch has a child (i.e., something was committed on # the branch), then we store a reference to the branch_data # there, define the child's parent to be the branch's parent, # and list the child in the branch parent's branches_revs_data: if branch_data.child is not None: child_data = self._rev_data[branch_data.child] assert child_data.parent_branch_data is None child_data.parent_branch_data = branch_data assert child_data.parent is None child_data.parent = branch_data.parent parent_data.branches_revs_data.append(branch_data.child)
def _get_attic_file(self, parent_directory, basename): """Return a CVSFile object for the Attic file at BASENAME. PARENT_DIRECTORY is the CVSDirectory that physically contains the file on the filesystem (i.e., the Attic directory). It is not necessarily the parent_directory of the CVSFile that will be returned. Return CVSFile, whose parent directory is usually PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY iff CVSFile will remain in the Attic directory.""" try: return self._get_cvs_file( parent_directory, basename, file_in_attic=True, ) except FileInAndOutOfAtticException, e: if Ctx().retain_conflicting_attic_files: logger.warn( "%s: %s;\n" " storing the latter into 'Attic' subdirectory.\n" % (warning_prefix, e) ) else: self.error_handler(str(e)) # Either way, return a CVSFile object so that the rest of the # file processing can proceed: return self._get_cvs_file( parent_directory, basename, file_in_attic=True, leave_in_attic=True, )
def _resolve_tag_dependencies(self): """Resolve dependencies involving tags.""" for (rev, tag_data_list) in self.sdc.tags_data.items(): try: parent_data = self._rev_data[rev] except KeyError: logger.warn( 'In %r:\n' ' the following tag(s) reference non-existing revision %s\n' ' and will be ignored:\n' ' %s' % ( self.cvs_file.rcs_path, rev, ', '.join([ repr(tag_data.symbol.name) for tag_data in tag_data_list ]), )) del self.sdc.tags_data[rev] else: for tag_data in tag_data_list: assert tag_data.rev == rev # The tag_data's rev has the tag as a child: parent_data.tags_data.append(tag_data)
def __init__(self, configfilename, ignore_case=True): config = ConfigParser.ConfigParser() if ignore_case: self.transform_case = _squash_case else: config.optionxform = _preserve_case self.transform_case = _preserve_case configtext = open(configfilename).read() if self.comment_re.search(configtext): logger.warn( '%s: Please be aware that a space followed by a\n' 'semicolon is sometimes treated as a comment in configuration\n' 'files. This pattern was seen in\n' ' %s\n' 'Please make sure that you have not inadvertently commented\n' 'out part of an important line.' % ( warning_prefix, configfilename, )) config.readfp(StringIO(configtext), configfilename) self.patterns = [] sections = config.sections() sections.sort() for section in sections: if self.transform_case(section) == 'auto-props': patterns = config.options(section) patterns.sort() for pattern in patterns: value = config.get(section, pattern) if value: self._add_pattern(pattern, value)
def _add_branch(self, name, branch_number): """Record that BRANCH_NUMBER is the branch number for branch NAME, and derive and record the revision from which NAME sprouts. BRANCH_NUMBER is an RCS branch number with an odd number of components, for example '1.7.2' (never '1.7.0.2'). Return the _BranchData instance (which is usually newly-created).""" branch_data = self.branches_data.get(branch_number) if branch_data is not None: logger.warn( "%s: in '%s':\n" " branch '%s' already has name '%s',\n" " cannot also have name '%s', ignoring the latter\n" % (warning_prefix, self.cvs_file.rcs_path, branch_number, branch_data.symbol.name, name) ) return branch_data symbol = self.pdc.get_symbol(name) branch_data = _BranchData( self.collect_data.item_key_generator.gen_id(), symbol, branch_number ) self.branches_data[branch_number] = branch_data return branch_data
def _resolve_branch_dependencies(self): """Resolve dependencies involving branches.""" for branch_data in self.sdc.branches_data.values(): # The branch_data's parent has the branch as a child regardless # of whether the branch had any subsequent commits: try: parent_data = self._rev_data[branch_data.parent] except KeyError: logger.warn( 'In %r:\n' ' branch %r references non-existing revision %s\n' ' and will be ignored.' % (self.cvs_file.rcs_path, branch_data.symbol.name, branch_data.parent,)) del self.sdc.branches_data[branch_data.branch_number] else: parent_data.branches_data.append(branch_data) # If the branch has a child (i.e., something was committed on # the branch), then we store a reference to the branch_data # there, define the child's parent to be the branch's parent, # and list the child in the branch parent's branches_revs_data: if branch_data.child is not None: child_data = self._rev_data[branch_data.child] assert child_data.parent_branch_data is None child_data.parent_branch_data = branch_data assert child_data.parent is None child_data.parent = branch_data.parent parent_data.branches_revs_data.append(branch_data.child)
def __init__(self, configfilename, ignore_case=True): config = ConfigParser.ConfigParser() if ignore_case: self.transform_case = _squash_case else: config.optionxform = _preserve_case self.transform_case = _preserve_case configtext = open(configfilename).read() if self.comment_re.search(configtext): logger.warn( '%s: Please be aware that a space followed by a\n' 'semicolon is sometimes treated as a comment in configuration\n' 'files. This pattern was seen in\n' ' %s\n' 'Please make sure that you have not inadvertently commented\n' 'out part of an important line.' % (warning_prefix, configfilename,) ) config.readfp(StringIO(configtext), configfilename) self.patterns = [] sections = config.sections() sections.sort() for section in sections: if self.transform_case(section) == 'auto-props': patterns = config.options(section) patterns.sort() for pattern in patterns: value = config.get(section, pattern) if value: self._add_pattern(pattern, value)
def log_leftovers(self): """If any TextRecords still exist, log them.""" if self.text_records: logger.warn( "%s: internal problem: leftover revisions in the checkout cache:" % warning_prefix) for text_record in self.itervalues(): logger.warn(' %s' % (text_record,))
def log_leftovers(self): """If any TextRecords still exist, log them.""" if self.text_records: logger.warn( "%s: internal problem: leftover revisions in the checkout cache:" % warning_prefix) for text_record in self.itervalues(): logger.warn(' %s' % (text_record, ))
def purge_ghost_symbols(self): """Purge any symbols that don't have any activity. Such ghost symbols can arise if a symbol was defined in an RCS file but pointed at a non-existent revision.""" for stats in self._stats.values(): if stats.is_ghost(): logger.warn('Deleting ghost symbol: %s' % (stats.lod,)) del self._stats[stats.lod]
def purge_ghost_symbols(self): """Purge any symbols that don't have any activity. Such ghost symbols can arise if a symbol was defined in an RCS file but pointed at a non-existent revision.""" for stats in self._stats.values(): if stats.is_ghost(): logger.warn('Deleting ghost symbol: %s' % (stats.lod, )) del self._stats[stats.lod]
def set_properties(self, cvs_file): propdict = self.get_propdict(cvs_file) for (k,v) in propdict.items(): if k in cvs_file.properties: if cvs_file.properties[k] != v: logger.warn( "Property '%s' already set to %r for file %s; " "auto-props value (%r) ignored." % (k, cvs_file.properties[k], cvs_file.cvs_path, v,) ) else: cvs_file.properties[k] = v
def set_properties(self, cvs_file): propdict = self.get_propdict(cvs_file) for (k, v) in propdict.items(): if k in cvs_file.properties: if cvs_file.properties[k] != v: logger.warn("Property '%s' already set to %r for file %s; " "auto-props value (%r) ignored." % ( k, cvs_file.properties[k], cvs_file.cvs_path, v, )) else: cvs_file.properties[k] = v
def get_propdict(self, cvs_file): basename = self.transform_case(cvs_file.basename) propdict = {} for pattern in self.patterns: if pattern.match(basename): for (key,value) in pattern.propdict.items(): if key in propdict: if propdict[key] != value: logger.warn( "Contradictory values set for property '%s' for file %s." % (key, cvs_file,)) else: propdict[key] = value return propdict
def _process_revision_changeset(self, changeset, timestamp): """Process CHANGESET, using TIMESTAMP as the commit time. Create and yield one or more SVNCommits in the process. CHANGESET must be an OrderedChangeset. TIMESTAMP is used as the timestamp for any resulting SVNCommits.""" if not changeset.cvs_item_ids: logger.warn('Changeset has no items: %r' % changeset) return logger.verbose('-' * 60) logger.verbose('CVS Revision grouping:') logger.verbose(' Time: %s' % time.ctime(timestamp)) # Generate an SVNCommit unconditionally. Even if the only change in # this group of CVSRevisions is a deletion of an already-deleted # file (that is, a CVS revision in state 'dead' whose predecessor # was also in state 'dead'), the conversion will still generate a # Subversion revision containing the log message for the second dead # revision, because we don't want to lose that information. cvs_revs = list(changeset.iter_cvs_items()) if cvs_revs: cvs_revs.sort( lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path)) svn_commit = SVNPrimaryCommit(cvs_revs, timestamp, self.revnum_generator.gen_id()) yield svn_commit for cvs_rev in cvs_revs: Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum) # Generate an SVNPostCommit if we have default branch revs. If # some of the revisions in this commit happened on a non-trunk # default branch, then those files have to be copied into trunk # manually after being changed on the branch (because the RCS # "default branch" appears as head, i.e., trunk, in practice). # Unfortunately, Subversion doesn't support copies with sources # in the current txn. All copies must be based in committed # revisions. Therefore, we generate the copies in a new # revision. for svn_post_commit in self._post_commit(cvs_revs, svn_commit.revnum, timestamp): yield svn_post_commit
class SymbolMapper(SymbolTransform): """A SymbolTransform that transforms specific symbol definitions. The user has to specify the exact CVS filename, symbol name, and revision number to be transformed, and the new name (or None if the symbol should be ignored). The mappings can be set via a constructor argument or by calling __setitem__().""" def __init__(self, items=[]): """Initialize the mapper. ITEMS is a list of tuples (cvs_filename, symbol_name, revision, new_name) which will be set as mappings.""" # A map {(cvs_filename, symbol_name, revision) : new_name}: self._map = {} for (cvs_filename, symbol_name, revision, new_name) in items: self[cvs_filename, symbol_name, revision] = new_name def __setitem__(self, (cvs_filename, symbol_name, revision), new_name): """Set a mapping for a particular file, symbol, and revision.""" cvs_filename = os.path.normcase(os.path.normpath(cvs_filename)) key = (cvs_filename, symbol_name, revision) if key in self._map: logger.warn('Overwriting symbol transform for\n' ' filename=%r symbol=%s revision=%s' % ( cvs_filename, symbol_name, revision, )) self._map[key] = new_name
def check_clean(self): """All passes have been processed. Output a warning messages if all artifacts have not been accounted for. (This is mainly a consistency check, that no artifacts were registered under nonexistent passes.)""" unclean_artifacts = [ str(artifact) for artifact in self._artifacts.values() if artifact._passes_needed ] if unclean_artifacts: logger.warn( 'INTERNAL: The following artifacts were not cleaned up:\n %s\n' % ('\n '.join(unclean_artifacts)))
def check_clean(self): """All passes have been processed. Output a warning messages if all artifacts have not been accounted for. (This is mainly a consistency check, that no artifacts were registered under nonexistent passes.)""" unclean_artifacts = [ str(artifact) for artifact in self._artifacts.values() if artifact._passes_needed] if unclean_artifacts: logger.warn( 'INTERNAL: The following artifacts were not cleaned up:\n %s\n' % ('\n '.join(unclean_artifacts)))
def _generate_attic_cvs_files(self, cvs_directory, exclude_paths): """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY. Also yield CVS_DIRECTORY if any files are being retained in the Attic. Silently ignore subdirectories named '.svn' or 'CVS', but emit a warning if any other directories are found within the Attic directory.""" retained_attic_files = [] fnames = os.listdir(cvs_directory.rcs_path) fnames.sort() for fname in fnames: pathname = os.path.join(cvs_directory.rcs_path, fname) path_in_repository = path_join(cvs_directory.get_cvs_path(), fname) if path_in_repository in exclude_paths: logger.normal( "Excluding file from conversion: %s" % (path_in_repository,) ) elif os.path.isdir(pathname): if fname == '.svn' or fname == 'CVS': logger.debug( "Directory %s found within Attic; ignoring" % (pathname,) ) else: logger.warn( "Directory %s found within Attic; ignoring" % (pathname,) ) elif fname.endswith(',v'): cvs_file = self._get_attic_file(cvs_directory, fname) if cvs_file.parent_directory == cvs_directory: # This file will be retained in the Attic directory. retained_attic_files.append(cvs_file) else: # This is a normal Attic file, which is treated as if it # were located one directory up: yield cvs_file if retained_attic_files: # There was at least one file in the attic that will be retained # in the attic. First include the Attic directory itself in the # output, then the retained attic files: yield cvs_directory for cvs_file in retained_attic_files: yield cvs_file
def _process_revision_changeset(self, changeset, timestamp): """Process CHANGESET, using TIMESTAMP as the commit time. Create and yield one or more SVNCommits in the process. CHANGESET must be an OrderedChangeset. TIMESTAMP is used as the timestamp for any resulting SVNCommits.""" if not changeset.cvs_item_ids: logger.warn('Changeset has no items: %r' % changeset) return logger.verbose('-' * 60) logger.verbose('CVS Revision grouping:') logger.verbose(' Time: %s' % time.ctime(timestamp)) # Generate an SVNCommit unconditionally. Even if the only change in # this group of CVSRevisions is a deletion of an already-deleted # file (that is, a CVS revision in state 'dead' whose predecessor # was also in state 'dead'), the conversion will still generate a # Subversion revision containing the log message for the second dead # revision, because we don't want to lose that information. cvs_revs = list(changeset.iter_cvs_items()) if cvs_revs: cvs_revs.sort(lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path)) svn_commit = SVNPrimaryCommit( cvs_revs, timestamp, self.revnum_generator.gen_id() ) yield svn_commit for cvs_rev in cvs_revs: Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum) # Generate an SVNPostCommit if we have default branch revs. If # some of the revisions in this commit happened on a non-trunk # default branch, then those files have to be copied into trunk # manually after being changed on the branch (because the RCS # "default branch" appears as head, i.e., trunk, in practice). # Unfortunately, Subversion doesn't support copies with sources # in the current txn. All copies must be based in committed # revisions. Therefore, we generate the copies in a new # revision. for svn_post_commit in self._post_commit( cvs_revs, svn_commit.revnum, timestamp ): yield svn_post_commit
def get_propdict(self, cvs_file): basename = self.transform_case(cvs_file.rcs_basename) propdict = {} for pattern in self.patterns: if pattern.match(basename): for (key, value) in pattern.propdict.items(): if key in propdict: if propdict[key] != value: logger.warn( "Contradictory values set for property '%s' for file %s." % ( key, cvs_file, )) else: propdict[key] = value return propdict
def process_file(self, cvs_file): logger.normal(cvs_file.filename) fdc = _FileDataCollector(self, cvs_file) try: cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc) except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError): self.collect_data.record_fatal_error( "%r is not a valid ,v file" % (cvs_file.filename,) ) # Abort the processing of this file, but let the pass continue # with other files: return except: logger.warn("Exception occurred while parsing %s" % cvs_file.filename) raise else: self.num_files += 1 return fdc.get_cvs_file_items()
def get(self, timestamp, change_expected): """Return a reasonable timestamp derived from TIMESTAMP. Push TIMESTAMP into the future if necessary to ensure that it is at least one second later than every other timestamp that has been returned by previous calls to this method. If CHANGE_EXPECTED is not True, then log a message if the timestamp has to be changed.""" if timestamp > self.max_timestamp: # If a timestamp is in the future, it is assumed that it is # bogus. Shift it backwards in time to prevent it forcing other # timestamps to be pushed even further in the future. # Note that this is not nearly a complete solution to the bogus # timestamp problem. A timestamp in the future still affects # the ordering of changesets, and a changeset having such a # timestamp will not be committed until all changesets with # earlier timestamps have been committed, even if other # changesets with even earlier timestamps depend on this one. self.timestamp = self.timestamp + 1.0 if not change_expected: logger.warn( 'Timestamp "%s" is in the future; changed to "%s".' % ( time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)), )) elif timestamp < self.timestamp + 1.0: self.timestamp = self.timestamp + 1.0 if not change_expected and logger.is_on(logger.VERBOSE): logger.verbose( 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' % ( time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)), )) else: self.timestamp = timestamp return self.timestamp
def _resolve_tag_dependencies(self): """Resolve dependencies involving tags.""" for (rev, tag_data_list) in self.sdc.tags_data.items(): try: parent_data = self._rev_data[rev] except KeyError: logger.warn( 'In %r:\n' ' the following tag(s) reference non-existing revision %s\n' ' and will be ignored:\n' ' %s' % ( self.cvs_file.rcs_path, rev, ', '.join([repr(tag_data.symbol.name) for tag_data in tag_data_list]),)) del self.sdc.tags_data[rev] else: for tag_data in tag_data_list: assert tag_data.rev == rev # The tag_data's rev has the tag as a child: parent_data.tags_data.append(tag_data)
def define_symbol(self, name, revision): """Record a symbol definition for later processing.""" # Canonicalize the revision number: revision = _branch_revision_re.sub(r'\1\2', revision) # Apply any user-defined symbol transforms to the symbol name: name = self.transform_symbol(name, revision) if name is not None: # Verify that the revision number is valid: if _valid_revision_re.match(revision): # The revision number is valid; record it for later processing: self._symbol_defs.append( (name, revision) ) else: logger.warn( 'In %r:\n' ' branch %r references invalid revision %s\n' ' and will be ignored.' % (self.cvs_file.rcs_path, name, revision,) )
def set_revision_info(self, revision, log, text): """This is a callback method declared in Sink.""" rev_data = self._rev_data[revision] cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id] if cvs_rev.metadata_id is not None: # Users have reported problems with repositories in which the # deltatext block for revision 1.1 appears twice. It is not # known whether this results from a CVS/RCS bug, or from botched # hand-editing of the repository. In any case, empirically, cvs # and rcs both use the first version when checking out data, so # that's what we will do. (For the record: "cvs log" fails on # such a file; "rlog" prints the log message from the first # block and ignores the second one.) logger.warn( "%s: in '%s':\n" " Deltatext block for revision %s appeared twice;\n" " ignoring the second occurrence.\n" % (warning_prefix, self.cvs_file.rcs_path, revision,) ) return if is_trunk_revision(revision): branch_name = None else: branch_name = self.sdc.rev_to_branch_data(revision).symbol.name cvs_rev.metadata_id = self.collect_data.metadata_logger.store( self.project, branch_name, rev_data.author, log ) cvs_rev.deltatext_exists = bool(text) # If this is revision 1.1, determine whether the file appears to # have been created via 'cvs add' instead of 'cvs import'. The # test is that the log message CVS uses for 1.1 in imports is # "Initial revision\n" with no period. (This fact helps determine # whether this file might have had a default branch in the past.) if revision == '1.1': self._file_imported = (log == 'Initial revision\n')
def get(self, timestamp, change_expected): """Return a reasonable timestamp derived from TIMESTAMP. Push TIMESTAMP into the future if necessary to ensure that it is at least one second later than every other timestamp that has been returned by previous calls to this method. If CHANGE_EXPECTED is not True, then log a message if the timestamp has to be changed.""" if timestamp > self.max_timestamp: # If a timestamp is in the future, it is assumed that it is # bogus. Shift it backwards in time to prevent it forcing other # timestamps to be pushed even further in the future. # Note that this is not nearly a complete solution to the bogus # timestamp problem. A timestamp in the future still affects # the ordering of changesets, and a changeset having such a # timestamp will not be committed until all changesets with # earlier timestamps have been committed, even if other # changesets with even earlier timestamps depend on this one. self.timestamp = self.timestamp + 1.0 if not change_expected: logger.warn( 'Timestamp "%s" is in the future; changed to "%s".' % (time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)),) ) elif timestamp < self.timestamp + 1.0: self.timestamp = self.timestamp + 1.0 if not change_expected and logger.is_on(logger.VERBOSE): logger.verbose( 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' % (time.asctime(time.gmtime(timestamp)), time.asctime(time.gmtime(self.timestamp)),) ) else: self.timestamp = timestamp return self.timestamp
def set_revision_info(self, revision, log, text): """This is a callback method declared in Sink.""" rev_data = self._rev_data[revision] cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id] if cvs_rev.metadata_id is not None: # Users have reported problems with repositories in which the # deltatext block for revision 1.1 appears twice. It is not # known whether this results from a CVS/RCS bug, or from botched # hand-editing of the repository. In any case, empirically, cvs # and rcs both use the first version when checking out data, so # that's what we will do. (For the record: "cvs log" fails on # such a file; "rlog" prints the log message from the first # block and ignores the second one.) logger.warn("%s: in '%s':\n" " Deltatext block for revision %s appeared twice;\n" " ignoring the second occurrence.\n" % ( warning_prefix, self.cvs_file.rcs_path, revision, )) return if is_trunk_revision(revision): branch_name = None else: branch_name = self.sdc.rev_to_branch_data(revision).symbol.name cvs_rev.metadata_id = self.collect_data.metadata_logger.store( self.project, branch_name, rev_data.author, log) cvs_rev.deltatext_exists = bool(text) # If this is revision 1.1, determine whether the file appears to # have been created via 'cvs add' instead of 'cvs import'. The # test is that the log message CVS uses for 1.1 in imports is # "Initial revision\n" with no period. (This fact helps determine # whether this file might have had a default branch in the past.) if revision == '1.1': self._file_imported = (log == 'Initial revision\n')
def define_symbol(self, name, revision): """Record a symbol definition for later processing.""" # Canonicalize the revision number: revision = _branch_revision_re.sub(r'\1\2', revision) # Apply any user-defined symbol transforms to the symbol name: name = self.transform_symbol(name, revision) if name is not None: # Verify that the revision number is valid: if _valid_revision_re.match(revision): # The revision number is valid; record it for later processing: self._symbol_defs.append((name, revision)) else: logger.warn('In %r:\n' ' branch %r references invalid revision %s\n' ' and will be ignored.' % ( self.cvs_file.rcs_path, name, revision, ))
def _add_branch(self, name, branch_number): """Record that BRANCH_NUMBER is the branch number for branch NAME, and derive and record the revision from which NAME sprouts. BRANCH_NUMBER is an RCS branch number with an odd number of components, for example '1.7.2' (never '1.7.0.2'). Return the _BranchData instance (which is usually newly-created).""" branch_data = self.branches_data.get(branch_number) if branch_data is not None: logger.warn( "%s: in '%s':\n" " branch '%s' already has name '%s',\n" " cannot also have name '%s', ignoring the latter\n" % (warning_prefix, self.cvs_file.rcs_path, branch_number, branch_data.symbol.name, name)) return branch_data symbol = self.pdc.get_symbol(name) branch_data = _BranchData( self.collect_data.item_key_generator.gen_id(), symbol, branch_number) self.branches_data[branch_number] = branch_data return branch_data
def _add_pattern(self, pattern, props): propdict = {} if self.quoted_re.match(pattern): logger.warn( '%s: Quoting is not supported in auto-props; please verify rule\n' 'for %r. (Using pattern including quotation marks.)\n' % (warning_prefix, pattern,) ) for prop in props.split(';'): prop = prop.strip() m = self.property_unset_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, leaving %r unset.' % (pattern, name,) ) propdict[name] = None continue m = self.property_set_re.match(prop) if m: name = m.group('name') value = m.group('value') if self.quoted_re.match(value): logger.warn( '%s: Quoting is not supported in auto-props; please verify\n' 'rule %r for pattern %r. (Using value\n' 'including quotation marks.)\n' % (warning_prefix, prop, pattern,) ) logger.debug( 'auto-props: For %r, setting %r to %r.' % (pattern, name, value,) ) propdict[name] = value continue m = self.property_novalue_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, setting %r to the empty string' % (pattern, name,) ) propdict[name] = '' continue logger.warn( '%s: in auto-props line for %r, value %r cannot be parsed (ignored)' % (warning_prefix, pattern, prop,) ) self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
class SubtreeSymbolMapper(SymbolTransform): """A SymbolTransform that transforms symbols within a whole repo subtree. The user has to specify a CVS repository path (a filename or directory) and the original symbol name. All symbols under that path will be renamed to the specified new name (which can be None if the symbol should be ignored). The mappings can be set via a constructor argument or by calling __setitem__(). Only the most specific rule is applied.""" def __init__(self, items=[]): """Initialize the mapper. ITEMS is a list of tuples (cvs_path, symbol_name, new_name) which will be set as mappings. cvs_path is a string naming a directory within the CVS repository.""" # A map {symbol_name : {cvs_path : new_name}}: self._map = {} for (cvs_path, symbol_name, new_name) in items: self[cvs_path, symbol_name] = new_name def __setitem__(self, (cvs_path, symbol_name), new_name): """Set a mapping for a particular file and symbol.""" try: symbol_map = self._map[symbol_name] except KeyError: symbol_map = {} self._map[symbol_name] = symbol_map cvs_path = os.path.normcase(os.path.normpath(cvs_path)) if cvs_path in symbol_map: logger.warn('Overwriting symbol transform for\n' ' directory=%r symbol=%s' % ( cvs_path, symbol_name, )) symbol_map[cvs_path] = new_name
def _process_ntdbrs(self): """Fix up any non-trunk default branch revisions (if present). If a non-trunk default branch is determined to have existed, yield the _RevisionData.ids for all revisions that were once non-trunk default revisions, in dependency order. There are two cases to handle: One case is simple. The RCS file lists a default branch explicitly in its header, such as '1.1.1'. In this case, we know that every revision on the vendor branch is to be treated as head of trunk at that point in time. But there's also a degenerate case. The RCS file does not currently have a default branch, yet we can deduce that for some period in the past it probably *did* have one. For example, the file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this case, we should record 1.1.1.96 as the last vendor revision to have been the head of the default branch. If any non-trunk default branch revisions are found: - Set their ntdbr members to True. - Connect the last one with revision 1.2. - Remove revision 1.1 if it is not needed. """ try: if self.default_branch: try: vendor_cvs_branch_id = self.sdc.branches_data[ self.default_branch].id except KeyError: logger.warn( '%s: In %s:\n' ' vendor branch %r is not present in file and will be ignored.' % ( warning_prefix, self.cvs_file.rcs_path, self.default_branch, )) self.default_branch = None return vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_cvs_branch_id]) if not self._cvs_file_items.process_live_ntdb( vendor_lod_items): return elif self._file_imported: vendor_branch_data = self.sdc.branches_data.get('1.1.1') if vendor_branch_data is None: return else: vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_branch_data.id]) if not self._cvs_file_items.process_historical_ntdb( vendor_lod_items): return else: return except VendorBranchError, e: self.collect_data.record_fatal_error(str(e)) return
'for more information.\n' % (error_prefix, anydbm._defaultmod.__name__,) ) sys.exit(1) # 3. If we are using the old bsddb185 module, then try prefer gdbm instead. # Unfortunately, gdbm appears not to be trouble free, either. if hasattr(anydbm._defaultmod, 'bsddb') \ and not hasattr(anydbm._defaultmod.bsddb, '__version__'): try: gdbm = __import__('gdbm') except ImportError: logger.warn( '%s: The version of the bsddb module found on your computer ' 'has been\n' 'reported to malfunction on some datasets, causing KeyError ' 'exceptions.\n' % (warning_prefix,) ) else: anydbm._defaultmod = gdbm class Database: """A database that uses a Serializer to store objects of a certain type. The serializer is stored in the database under the key self.serializer_key. (This implies that self.serializer_key may not be used as a key for normal entries.) The backing database is an anydbm-based DBM.
except (cvs2svn_rcsparse.common.RCSParseError, RuntimeError): self.collect_data.record_fatal_error( "%r is not a valid ,v file" % (cvs_file.rcs_path,) ) # Abort the processing of this file, but let the pass continue # with other files: return except ValueError, e: self.collect_data.record_fatal_error( "%r is not a valid ,v file (%s)" % (cvs_file.rcs_path, str(e),) ) # Abort the processing of this file, but let the pass continue # with other files: return except: logger.warn("Exception occurred while parsing %s" % cvs_file.rcs_path) raise else: self.num_files += 1 return fdc.get_cvs_file_items() class CollectData: """Repository for data collected by parsing the CVS repository files. This class manages the databases into which information collected from the CVS repository is stored. The data are stored into this class by _FileDataCollector instances, one of which is created for each file to be parsed."""
def _add_pattern(self, pattern, props): propdict = {} if self.quoted_re.match(pattern): logger.warn( '%s: Quoting is not supported in auto-props; please verify rule\n' 'for %r. (Using pattern including quotation marks.)\n' % ( warning_prefix, pattern, )) for prop in props.split(';'): prop = prop.strip() m = self.property_unset_re.match(prop) if m: name = m.group('name') logger.debug('auto-props: For %r, leaving %r unset.' % ( pattern, name, )) propdict[name] = None continue m = self.property_set_re.match(prop) if m: name = m.group('name') value = m.group('value') if self.quoted_re.match(value): logger.warn( '%s: Quoting is not supported in auto-props; please verify\n' 'rule %r for pattern %r. (Using value\n' 'including quotation marks.)\n' % ( warning_prefix, prop, pattern, )) logger.debug('auto-props: For %r, setting %r to %r.' % ( pattern, name, value, )) propdict[name] = value continue m = self.property_novalue_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, setting %r to the empty string' % ( pattern, name, )) propdict[name] = '' continue logger.warn( '%s: in auto-props line for %r, value %r cannot be parsed (ignored)' % ( warning_prefix, pattern, prop, )) self.patterns.append( self.Pattern(self.transform_case(pattern), propdict))
'for more information.\n' % ( error_prefix, anydbm._defaultmod.__name__, )) sys.exit(1) # 3. If we are using the old bsddb185 module, then try prefer gdbm instead. # Unfortunately, gdbm appears not to be trouble free, either. if hasattr(anydbm._defaultmod, 'bsddb') \ and not hasattr(anydbm._defaultmod.bsddb, '__version__'): try: gdbm = __import__('gdbm') except ImportError: logger.warn( '%s: The version of the bsddb module found on your computer ' 'has been\n' 'reported to malfunction on some datasets, causing KeyError ' 'exceptions.\n' % (warning_prefix, )) else: anydbm._defaultmod = gdbm class Database: """A database that uses a Serializer to store objects of a certain type. The serializer is stored in the database under the key self.serializer_key. (This implies that self.serializer_key may not be used as a key for normal entries.) The backing database is an anydbm-based DBM.
def _process_ntdbrs(self): """Fix up any non-trunk default branch revisions (if present). If a non-trunk default branch is determined to have existed, yield the _RevisionData.ids for all revisions that were once non-trunk default revisions, in dependency order. There are two cases to handle: One case is simple. The RCS file lists a default branch explicitly in its header, such as '1.1.1'. In this case, we know that every revision on the vendor branch is to be treated as head of trunk at that point in time. But there's also a degenerate case. The RCS file does not currently have a default branch, yet we can deduce that for some period in the past it probably *did* have one. For example, the file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this case, we should record 1.1.1.96 as the last vendor revision to have been the head of the default branch. If any non-trunk default branch revisions are found: - Set their ntdbr members to True. - Connect the last one with revision 1.2. - Remove revision 1.1 if it is not needed. """ try: if self.default_branch: try: vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id except KeyError: logger.warn( '%s: In %s:\n' ' vendor branch %r is not present in file and will be ignored.' % (warning_prefix, self.cvs_file.rcs_path, self.default_branch,) ) self.default_branch = None return vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_cvs_branch_id] ) if not self._cvs_file_items.process_live_ntdb(vendor_lod_items): return elif self._file_imported: vendor_branch_data = self.sdc.branches_data.get('1.1.1') if vendor_branch_data is None: return else: vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_branch_data.id] ) if not self._cvs_file_items.process_historical_ntdb( vendor_lod_items ): return else: return except VendorBranchError, e: self.collect_data.record_fatal_error(str(e)) return
self.collect_data.record_fatal_error("%r is not a valid ,v file" % (cvs_file.rcs_path, )) # Abort the processing of this file, but let the pass continue # with other files: return except ValueError, e: self.collect_data.record_fatal_error( "%r is not a valid ,v file (%s)" % ( cvs_file.rcs_path, str(e), )) # Abort the processing of this file, but let the pass continue # with other files: return except: logger.warn("Exception occurred while parsing %s" % cvs_file.rcs_path) raise else: self.num_files += 1 return fdc.get_cvs_file_items() class CollectData: """Repository for data collected by parsing the CVS repository files. This class manages the databases into which information collected from the CVS repository is stored. The data are stored into this class by _FileDataCollector instances, one of which is created for each file to be parsed.""" def __init__(self, stats_keeper):