Exemple #1
0
    def _resolve_branch_dependencies(self):
        """Resolve dependencies involving branches."""

        for branch_data in self.sdc.branches_data.values():
            # The branch_data's parent has the branch as a child regardless
            # of whether the branch had any subsequent commits:
            try:
                parent_data = self._rev_data[branch_data.parent]
            except KeyError:
                logger.warn(
                    'In %r:\n'
                    '    branch %r references non-existing revision %s\n'
                    '    and will be ignored.' % (
                        self.cvs_file.rcs_path,
                        branch_data.symbol.name,
                        branch_data.parent,
                    ))
                del self.sdc.branches_data[branch_data.branch_number]
            else:
                parent_data.branches_data.append(branch_data)

                # If the branch has a child (i.e., something was committed on
                # the branch), then we store a reference to the branch_data
                # there, define the child's parent to be the branch's parent,
                # and list the child in the branch parent's branches_revs_data:
                if branch_data.child is not None:
                    child_data = self._rev_data[branch_data.child]
                    assert child_data.parent_branch_data is None
                    child_data.parent_branch_data = branch_data
                    assert child_data.parent is None
                    child_data.parent = branch_data.parent
                    parent_data.branches_revs_data.append(branch_data.child)
Exemple #2
0
  def _get_attic_file(self, parent_directory, basename):
    """Return a CVSFile object for the Attic file at BASENAME.

    PARENT_DIRECTORY is the CVSDirectory that physically contains the
    file on the filesystem (i.e., the Attic directory).  It is not
    necessarily the parent_directory of the CVSFile that will be
    returned.

    Return CVSFile, whose parent directory is usually
    PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
    iff CVSFile will remain in the Attic directory."""

    try:
      return self._get_cvs_file(
          parent_directory, basename, file_in_attic=True,
          )
    except FileInAndOutOfAtticException, e:
      if Ctx().retain_conflicting_attic_files:
        logger.warn(
            "%s: %s;\n"
            "   storing the latter into 'Attic' subdirectory.\n"
            % (warning_prefix, e)
            )
      else:
        self.error_handler(str(e))

      # Either way, return a CVSFile object so that the rest of the
      # file processing can proceed:
      return self._get_cvs_file(
          parent_directory, basename, file_in_attic=True, leave_in_attic=True,
          )
Exemple #3
0
    def _resolve_tag_dependencies(self):
        """Resolve dependencies involving tags."""

        for (rev, tag_data_list) in self.sdc.tags_data.items():
            try:
                parent_data = self._rev_data[rev]
            except KeyError:
                logger.warn(
                    'In %r:\n'
                    '    the following tag(s) reference non-existing revision %s\n'
                    '    and will be ignored:\n'
                    '    %s' % (
                        self.cvs_file.rcs_path,
                        rev,
                        ', '.join([
                            repr(tag_data.symbol.name)
                            for tag_data in tag_data_list
                        ]),
                    ))
                del self.sdc.tags_data[rev]
            else:
                for tag_data in tag_data_list:
                    assert tag_data.rev == rev
                    # The tag_data's rev has the tag as a child:
                    parent_data.tags_data.append(tag_data)
Exemple #4
0
  def _get_attic_file(self, parent_directory, basename):
    """Return a CVSFile object for the Attic file at BASENAME.

    PARENT_DIRECTORY is the CVSDirectory that physically contains the
    file on the filesystem (i.e., the Attic directory).  It is not
    necessarily the parent_directory of the CVSFile that will be
    returned.

    Return CVSFile, whose parent directory is usually
    PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
    iff CVSFile will remain in the Attic directory."""

    try:
      return self._get_cvs_file(
          parent_directory, basename, file_in_attic=True,
          )
    except FileInAndOutOfAtticException, e:
      if Ctx().retain_conflicting_attic_files:
        logger.warn(
            "%s: %s;\n"
            "   storing the latter into 'Attic' subdirectory.\n"
            % (warning_prefix, e)
            )
      else:
        self.error_handler(str(e))

      # Either way, return a CVSFile object so that the rest of the
      # file processing can proceed:
      return self._get_cvs_file(
          parent_directory, basename, file_in_attic=True, leave_in_attic=True,
          )
    def __init__(self, configfilename, ignore_case=True):
        config = ConfigParser.ConfigParser()
        if ignore_case:
            self.transform_case = _squash_case
        else:
            config.optionxform = _preserve_case
            self.transform_case = _preserve_case

        configtext = open(configfilename).read()
        if self.comment_re.search(configtext):
            logger.warn(
                '%s: Please be aware that a space followed by a\n'
                'semicolon is sometimes treated as a comment in configuration\n'
                'files.  This pattern was seen in\n'
                '    %s\n'
                'Please make sure that you have not inadvertently commented\n'
                'out part of an important line.' % (
                    warning_prefix,
                    configfilename,
                ))

        config.readfp(StringIO(configtext), configfilename)
        self.patterns = []
        sections = config.sections()
        sections.sort()
        for section in sections:
            if self.transform_case(section) == 'auto-props':
                patterns = config.options(section)
                patterns.sort()
                for pattern in patterns:
                    value = config.get(section, pattern)
                    if value:
                        self._add_pattern(pattern, value)
Exemple #6
0
  def _add_branch(self, name, branch_number):
    """Record that BRANCH_NUMBER is the branch number for branch NAME,
    and derive and record the revision from which NAME sprouts.
    BRANCH_NUMBER is an RCS branch number with an odd number of
    components, for example '1.7.2' (never '1.7.0.2').  Return the
    _BranchData instance (which is usually newly-created)."""

    branch_data = self.branches_data.get(branch_number)

    if branch_data is not None:
      logger.warn(
          "%s: in '%s':\n"
          "   branch '%s' already has name '%s',\n"
          "   cannot also have name '%s', ignoring the latter\n"
          % (warning_prefix,
             self.cvs_file.rcs_path, branch_number,
             branch_data.symbol.name, name)
          )
      return branch_data

    symbol = self.pdc.get_symbol(name)
    branch_data = _BranchData(
        self.collect_data.item_key_generator.gen_id(), symbol, branch_number
        )
    self.branches_data[branch_number] = branch_data
    return branch_data
Exemple #7
0
  def _resolve_branch_dependencies(self):
    """Resolve dependencies involving branches."""

    for branch_data in self.sdc.branches_data.values():
      # The branch_data's parent has the branch as a child regardless
      # of whether the branch had any subsequent commits:
      try:
        parent_data = self._rev_data[branch_data.parent]
      except KeyError:
        logger.warn(
            'In %r:\n'
            '    branch %r references non-existing revision %s\n'
            '    and will be ignored.'
            % (self.cvs_file.rcs_path, branch_data.symbol.name,
               branch_data.parent,))
        del self.sdc.branches_data[branch_data.branch_number]
      else:
        parent_data.branches_data.append(branch_data)

        # If the branch has a child (i.e., something was committed on
        # the branch), then we store a reference to the branch_data
        # there, define the child's parent to be the branch's parent,
        # and list the child in the branch parent's branches_revs_data:
        if branch_data.child is not None:
          child_data = self._rev_data[branch_data.child]
          assert child_data.parent_branch_data is None
          child_data.parent_branch_data = branch_data
          assert child_data.parent is None
          child_data.parent = branch_data.parent
          parent_data.branches_revs_data.append(branch_data.child)
Exemple #8
0
  def __init__(self, configfilename, ignore_case=True):
    config = ConfigParser.ConfigParser()
    if ignore_case:
      self.transform_case = _squash_case
    else:
      config.optionxform = _preserve_case
      self.transform_case = _preserve_case

    configtext = open(configfilename).read()
    if self.comment_re.search(configtext):
      logger.warn(
          '%s: Please be aware that a space followed by a\n'
          'semicolon is sometimes treated as a comment in configuration\n'
          'files.  This pattern was seen in\n'
          '    %s\n'
          'Please make sure that you have not inadvertently commented\n'
          'out part of an important line.'
          % (warning_prefix, configfilename,)
          )

    config.readfp(StringIO(configtext), configfilename)
    self.patterns = []
    sections = config.sections()
    sections.sort()
    for section in sections:
      if self.transform_case(section) == 'auto-props':
        patterns = config.options(section)
        patterns.sort()
        for pattern in patterns:
          value = config.get(section, pattern)
          if value:
            self._add_pattern(pattern, value)
  def log_leftovers(self):
    """If any TextRecords still exist, log them."""

    if self.text_records:
      logger.warn(
          "%s: internal problem: leftover revisions in the checkout cache:"
          % warning_prefix)
      for text_record in self.itervalues():
        logger.warn('    %s' % (text_record,))
Exemple #10
0
    def log_leftovers(self):
        """If any TextRecords still exist, log them."""

        if self.text_records:
            logger.warn(
                "%s: internal problem: leftover revisions in the checkout cache:"
                % warning_prefix)
            for text_record in self.itervalues():
                logger.warn('    %s' % (text_record, ))
Exemple #11
0
  def purge_ghost_symbols(self):
    """Purge any symbols that don't have any activity.

    Such ghost symbols can arise if a symbol was defined in an RCS
    file but pointed at a non-existent revision."""

    for stats in self._stats.values():
      if stats.is_ghost():
        logger.warn('Deleting ghost symbol: %s' % (stats.lod,))
        del self._stats[stats.lod]
Exemple #12
0
    def purge_ghost_symbols(self):
        """Purge any symbols that don't have any activity.

    Such ghost symbols can arise if a symbol was defined in an RCS
    file but pointed at a non-existent revision."""

        for stats in self._stats.values():
            if stats.is_ghost():
                logger.warn('Deleting ghost symbol: %s' % (stats.lod, ))
                del self._stats[stats.lod]
Exemple #13
0
 def set_properties(self, cvs_file):
   propdict = self.get_propdict(cvs_file)
   for (k,v) in propdict.items():
     if k in cvs_file.properties:
       if cvs_file.properties[k] != v:
         logger.warn(
             "Property '%s' already set to %r for file %s; "
             "auto-props value (%r) ignored."
             % (k, cvs_file.properties[k], cvs_file.cvs_path, v,)
             )
     else:
       cvs_file.properties[k] = v
 def set_properties(self, cvs_file):
     propdict = self.get_propdict(cvs_file)
     for (k, v) in propdict.items():
         if k in cvs_file.properties:
             if cvs_file.properties[k] != v:
                 logger.warn("Property '%s' already set to %r for file %s; "
                             "auto-props value (%r) ignored." % (
                                 k,
                                 cvs_file.properties[k],
                                 cvs_file.cvs_path,
                                 v,
                             ))
         else:
             cvs_file.properties[k] = v
Exemple #15
0
  def get_propdict(self, cvs_file):
    basename = self.transform_case(cvs_file.basename)
    propdict = {}
    for pattern in self.patterns:
      if pattern.match(basename):
        for (key,value) in pattern.propdict.items():
          if key in propdict:
            if propdict[key] != value:
              logger.warn(
                  "Contradictory values set for property '%s' for file %s."
                  % (key, cvs_file,))
          else:
            propdict[key] = value

    return propdict
Exemple #16
0
    def _process_revision_changeset(self, changeset, timestamp):
        """Process CHANGESET, using TIMESTAMP as the commit time.

    Create and yield one or more SVNCommits in the process.  CHANGESET
    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
    for any resulting SVNCommits."""

        if not changeset.cvs_item_ids:
            logger.warn('Changeset has no items: %r' % changeset)
            return

        logger.verbose('-' * 60)
        logger.verbose('CVS Revision grouping:')
        logger.verbose('  Time: %s' % time.ctime(timestamp))

        # Generate an SVNCommit unconditionally.  Even if the only change in
        # this group of CVSRevisions is a deletion of an already-deleted
        # file (that is, a CVS revision in state 'dead' whose predecessor
        # was also in state 'dead'), the conversion will still generate a
        # Subversion revision containing the log message for the second dead
        # revision, because we don't want to lose that information.

        cvs_revs = list(changeset.iter_cvs_items())
        if cvs_revs:
            cvs_revs.sort(
                lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path))
            svn_commit = SVNPrimaryCommit(cvs_revs, timestamp,
                                          self.revnum_generator.gen_id())

            yield svn_commit

            for cvs_rev in cvs_revs:
                Ctx()._symbolings_logger.log_revision(cvs_rev,
                                                      svn_commit.revnum)

            # Generate an SVNPostCommit if we have default branch revs.  If
            # some of the revisions in this commit happened on a non-trunk
            # default branch, then those files have to be copied into trunk
            # manually after being changed on the branch (because the RCS
            # "default branch" appears as head, i.e., trunk, in practice).
            # Unfortunately, Subversion doesn't support copies with sources
            # in the current txn.  All copies must be based in committed
            # revisions.  Therefore, we generate the copies in a new
            # revision.
            for svn_post_commit in self._post_commit(cvs_revs,
                                                     svn_commit.revnum,
                                                     timestamp):
                yield svn_post_commit
class SymbolMapper(SymbolTransform):
    """A SymbolTransform that transforms specific symbol definitions.

  The user has to specify the exact CVS filename, symbol name, and
  revision number to be transformed, and the new name (or None if the
  symbol should be ignored).  The mappings can be set via a
  constructor argument or by calling __setitem__()."""
    def __init__(self, items=[]):
        """Initialize the mapper.

    ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
    new_name) which will be set as mappings."""

        # A map {(cvs_filename, symbol_name, revision) : new_name}:
        self._map = {}

        for (cvs_filename, symbol_name, revision, new_name) in items:
            self[cvs_filename, symbol_name, revision] = new_name

    def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
        """Set a mapping for a particular file, symbol, and revision."""

        cvs_filename = os.path.normcase(os.path.normpath(cvs_filename))
        key = (cvs_filename, symbol_name, revision)
        if key in self._map:
            logger.warn('Overwriting symbol transform for\n'
                        '    filename=%r symbol=%s revision=%s' % (
                            cvs_filename,
                            symbol_name,
                            revision,
                        ))
        self._map[key] = new_name
Exemple #18
0
    def check_clean(self):
        """All passes have been processed.

    Output a warning messages if all artifacts have not been accounted
    for.  (This is mainly a consistency check, that no artifacts were
    registered under nonexistent passes.)"""

        unclean_artifacts = [
            str(artifact) for artifact in self._artifacts.values()
            if artifact._passes_needed
        ]

        if unclean_artifacts:
            logger.warn(
                'INTERNAL: The following artifacts were not cleaned up:\n    %s\n'
                % ('\n    '.join(unclean_artifacts)))
  def check_clean(self):
    """All passes have been processed.

    Output a warning messages if all artifacts have not been accounted
    for.  (This is mainly a consistency check, that no artifacts were
    registered under nonexistent passes.)"""

    unclean_artifacts = [
        str(artifact)
        for artifact in self._artifacts.values()
        if artifact._passes_needed]

    if unclean_artifacts:
      logger.warn(
          'INTERNAL: The following artifacts were not cleaned up:\n    %s\n'
          % ('\n    '.join(unclean_artifacts)))
Exemple #20
0
  def _generate_attic_cvs_files(self, cvs_directory, exclude_paths):
    """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.

    Also yield CVS_DIRECTORY if any files are being retained in the
    Attic.

    Silently ignore subdirectories named '.svn' or 'CVS', but emit a
    warning if any other directories are found within the Attic
    directory."""

    retained_attic_files = []

    fnames = os.listdir(cvs_directory.rcs_path)
    fnames.sort()
    for fname in fnames:
      pathname = os.path.join(cvs_directory.rcs_path, fname)
      path_in_repository = path_join(cvs_directory.get_cvs_path(), fname)
      if path_in_repository in exclude_paths:
        logger.normal(
            "Excluding file from conversion: %s" % (path_in_repository,)
            )
      elif os.path.isdir(pathname):
        if fname == '.svn' or fname == 'CVS':
          logger.debug(
              "Directory %s found within Attic; ignoring" % (pathname,)
              )
        else:
          logger.warn(
              "Directory %s found within Attic; ignoring" % (pathname,)
              )
      elif fname.endswith(',v'):
        cvs_file = self._get_attic_file(cvs_directory, fname)
        if cvs_file.parent_directory == cvs_directory:
          # This file will be retained in the Attic directory.
          retained_attic_files.append(cvs_file)
        else:
          # This is a normal Attic file, which is treated as if it
          # were located one directory up:
          yield cvs_file

    if retained_attic_files:
      # There was at least one file in the attic that will be retained
      # in the attic.  First include the Attic directory itself in the
      # output, then the retained attic files:
      yield cvs_directory
      for cvs_file in retained_attic_files:
        yield cvs_file
  def _generate_attic_cvs_files(self, cvs_directory, exclude_paths):
    """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.

    Also yield CVS_DIRECTORY if any files are being retained in the
    Attic.

    Silently ignore subdirectories named '.svn' or 'CVS', but emit a
    warning if any other directories are found within the Attic
    directory."""

    retained_attic_files = []

    fnames = os.listdir(cvs_directory.rcs_path)
    fnames.sort()
    for fname in fnames:
      pathname = os.path.join(cvs_directory.rcs_path, fname)
      path_in_repository = path_join(cvs_directory.get_cvs_path(), fname)
      if path_in_repository in exclude_paths:
        logger.normal(
            "Excluding file from conversion: %s" % (path_in_repository,)
            )
      elif os.path.isdir(pathname):
        if fname == '.svn' or fname == 'CVS':
          logger.debug(
              "Directory %s found within Attic; ignoring" % (pathname,)
              )
        else:
          logger.warn(
              "Directory %s found within Attic; ignoring" % (pathname,)
              )
      elif fname.endswith(',v'):
        cvs_file = self._get_attic_file(cvs_directory, fname)
        if cvs_file.parent_directory == cvs_directory:
          # This file will be retained in the Attic directory.
          retained_attic_files.append(cvs_file)
        else:
          # This is a normal Attic file, which is treated as if it
          # were located one directory up:
          yield cvs_file

    if retained_attic_files:
      # There was at least one file in the attic that will be retained
      # in the attic.  First include the Attic directory itself in the
      # output, then the retained attic files:
      yield cvs_directory
      for cvs_file in retained_attic_files:
        yield cvs_file
  def _process_revision_changeset(self, changeset, timestamp):
    """Process CHANGESET, using TIMESTAMP as the commit time.

    Create and yield one or more SVNCommits in the process.  CHANGESET
    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
    for any resulting SVNCommits."""

    if not changeset.cvs_item_ids:
      logger.warn('Changeset has no items: %r' % changeset)
      return

    logger.verbose('-' * 60)
    logger.verbose('CVS Revision grouping:')
    logger.verbose('  Time: %s' % time.ctime(timestamp))

    # Generate an SVNCommit unconditionally.  Even if the only change in
    # this group of CVSRevisions is a deletion of an already-deleted
    # file (that is, a CVS revision in state 'dead' whose predecessor
    # was also in state 'dead'), the conversion will still generate a
    # Subversion revision containing the log message for the second dead
    # revision, because we don't want to lose that information.

    cvs_revs = list(changeset.iter_cvs_items())
    if cvs_revs:
      cvs_revs.sort(lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path))
      svn_commit = SVNPrimaryCommit(
          cvs_revs, timestamp, self.revnum_generator.gen_id()
          )

      yield svn_commit

      for cvs_rev in cvs_revs:
        Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)

      # Generate an SVNPostCommit if we have default branch revs.  If
      # some of the revisions in this commit happened on a non-trunk
      # default branch, then those files have to be copied into trunk
      # manually after being changed on the branch (because the RCS
      # "default branch" appears as head, i.e., trunk, in practice).
      # Unfortunately, Subversion doesn't support copies with sources
      # in the current txn.  All copies must be based in committed
      # revisions.  Therefore, we generate the copies in a new
      # revision.
      for svn_post_commit in self._post_commit(
            cvs_revs, svn_commit.revnum, timestamp
            ):
        yield svn_post_commit
    def get_propdict(self, cvs_file):
        basename = self.transform_case(cvs_file.rcs_basename)
        propdict = {}
        for pattern in self.patterns:
            if pattern.match(basename):
                for (key, value) in pattern.propdict.items():
                    if key in propdict:
                        if propdict[key] != value:
                            logger.warn(
                                "Contradictory values set for property '%s' for file %s."
                                % (
                                    key,
                                    cvs_file,
                                ))
                    else:
                        propdict[key] = value

        return propdict
Exemple #24
0
  def process_file(self, cvs_file):
    logger.normal(cvs_file.filename)
    fdc = _FileDataCollector(self, cvs_file)
    try:
      cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc)
    except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
      self.collect_data.record_fatal_error(
          "%r is not a valid ,v file" % (cvs_file.filename,)
          )
      # Abort the processing of this file, but let the pass continue
      # with other files:
      return
    except:
      logger.warn("Exception occurred while parsing %s" % cvs_file.filename)
      raise
    else:
      self.num_files += 1

    return fdc.get_cvs_file_items()
Exemple #25
0
    def get(self, timestamp, change_expected):
        """Return a reasonable timestamp derived from TIMESTAMP.

    Push TIMESTAMP into the future if necessary to ensure that it is
    at least one second later than every other timestamp that has been
    returned by previous calls to this method.

    If CHANGE_EXPECTED is not True, then log a message if the
    timestamp has to be changed."""

        if timestamp > self.max_timestamp:
            # If a timestamp is in the future, it is assumed that it is
            # bogus.  Shift it backwards in time to prevent it forcing other
            # timestamps to be pushed even further in the future.

            # Note that this is not nearly a complete solution to the bogus
            # timestamp problem.  A timestamp in the future still affects
            # the ordering of changesets, and a changeset having such a
            # timestamp will not be committed until all changesets with
            # earlier timestamps have been committed, even if other
            # changesets with even earlier timestamps depend on this one.
            self.timestamp = self.timestamp + 1.0
            if not change_expected:
                logger.warn(
                    'Timestamp "%s" is in the future; changed to "%s".' % (
                        time.asctime(time.gmtime(timestamp)),
                        time.asctime(time.gmtime(self.timestamp)),
                    ))
        elif timestamp < self.timestamp + 1.0:
            self.timestamp = self.timestamp + 1.0
            if not change_expected and logger.is_on(logger.VERBOSE):
                logger.verbose(
                    'Timestamp "%s" adjusted to "%s" to ensure monotonicity.' %
                    (
                        time.asctime(time.gmtime(timestamp)),
                        time.asctime(time.gmtime(self.timestamp)),
                    ))
        else:
            self.timestamp = timestamp

        return self.timestamp
Exemple #26
0
  def _resolve_tag_dependencies(self):
    """Resolve dependencies involving tags."""

    for (rev, tag_data_list) in self.sdc.tags_data.items():
      try:
        parent_data = self._rev_data[rev]
      except KeyError:
        logger.warn(
            'In %r:\n'
            '    the following tag(s) reference non-existing revision %s\n'
            '    and will be ignored:\n'
            '    %s' % (
                self.cvs_file.rcs_path, rev,
                ', '.join([repr(tag_data.symbol.name)
                           for tag_data in tag_data_list]),))
        del self.sdc.tags_data[rev]
      else:
        for tag_data in tag_data_list:
          assert tag_data.rev == rev
          # The tag_data's rev has the tag as a child:
          parent_data.tags_data.append(tag_data)
Exemple #27
0
  def define_symbol(self, name, revision):
    """Record a symbol definition for later processing."""

    # Canonicalize the revision number:
    revision = _branch_revision_re.sub(r'\1\2', revision)

    # Apply any user-defined symbol transforms to the symbol name:
    name = self.transform_symbol(name, revision)

    if name is not None:
      # Verify that the revision number is valid:
      if _valid_revision_re.match(revision):
        # The revision number is valid; record it for later processing:
        self._symbol_defs.append( (name, revision) )
      else:
        logger.warn(
            'In %r:\n'
            '    branch %r references invalid revision %s\n'
            '    and will be ignored.'
            % (self.cvs_file.rcs_path, name, revision,)
            )
Exemple #28
0
  def set_revision_info(self, revision, log, text):
    """This is a callback method declared in Sink."""

    rev_data = self._rev_data[revision]
    cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]

    if cvs_rev.metadata_id is not None:
      # Users have reported problems with repositories in which the
      # deltatext block for revision 1.1 appears twice.  It is not
      # known whether this results from a CVS/RCS bug, or from botched
      # hand-editing of the repository.  In any case, empirically, cvs
      # and rcs both use the first version when checking out data, so
      # that's what we will do.  (For the record: "cvs log" fails on
      # such a file; "rlog" prints the log message from the first
      # block and ignores the second one.)
      logger.warn(
          "%s: in '%s':\n"
          "   Deltatext block for revision %s appeared twice;\n"
          "   ignoring the second occurrence.\n"
          % (warning_prefix, self.cvs_file.rcs_path, revision,)
          )
      return

    if is_trunk_revision(revision):
      branch_name = None
    else:
      branch_name = self.sdc.rev_to_branch_data(revision).symbol.name

    cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
        self.project, branch_name, rev_data.author, log
        )
    cvs_rev.deltatext_exists = bool(text)

    # If this is revision 1.1, determine whether the file appears to
    # have been created via 'cvs add' instead of 'cvs import'.  The
    # test is that the log message CVS uses for 1.1 in imports is
    # "Initial revision\n" with no period.  (This fact helps determine
    # whether this file might have had a default branch in the past.)
    if revision == '1.1':
      self._file_imported = (log == 'Initial revision\n')
Exemple #29
0
  def get(self, timestamp, change_expected):
    """Return a reasonable timestamp derived from TIMESTAMP.

    Push TIMESTAMP into the future if necessary to ensure that it is
    at least one second later than every other timestamp that has been
    returned by previous calls to this method.

    If CHANGE_EXPECTED is not True, then log a message if the
    timestamp has to be changed."""

    if timestamp > self.max_timestamp:
      # If a timestamp is in the future, it is assumed that it is
      # bogus.  Shift it backwards in time to prevent it forcing other
      # timestamps to be pushed even further in the future.

      # Note that this is not nearly a complete solution to the bogus
      # timestamp problem.  A timestamp in the future still affects
      # the ordering of changesets, and a changeset having such a
      # timestamp will not be committed until all changesets with
      # earlier timestamps have been committed, even if other
      # changesets with even earlier timestamps depend on this one.
      self.timestamp = self.timestamp + 1.0
      if not change_expected:
        logger.warn(
            'Timestamp "%s" is in the future; changed to "%s".'
            % (time.asctime(time.gmtime(timestamp)),
               time.asctime(time.gmtime(self.timestamp)),)
            )
    elif timestamp < self.timestamp + 1.0:
      self.timestamp = self.timestamp + 1.0
      if not change_expected and logger.is_on(logger.VERBOSE):
        logger.verbose(
            'Timestamp "%s" adjusted to "%s" to ensure monotonicity.'
            % (time.asctime(time.gmtime(timestamp)),
               time.asctime(time.gmtime(self.timestamp)),)
            )
    else:
      self.timestamp = timestamp

    return self.timestamp
Exemple #30
0
    def set_revision_info(self, revision, log, text):
        """This is a callback method declared in Sink."""

        rev_data = self._rev_data[revision]
        cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]

        if cvs_rev.metadata_id is not None:
            # Users have reported problems with repositories in which the
            # deltatext block for revision 1.1 appears twice.  It is not
            # known whether this results from a CVS/RCS bug, or from botched
            # hand-editing of the repository.  In any case, empirically, cvs
            # and rcs both use the first version when checking out data, so
            # that's what we will do.  (For the record: "cvs log" fails on
            # such a file; "rlog" prints the log message from the first
            # block and ignores the second one.)
            logger.warn("%s: in '%s':\n"
                        "   Deltatext block for revision %s appeared twice;\n"
                        "   ignoring the second occurrence.\n" % (
                            warning_prefix,
                            self.cvs_file.rcs_path,
                            revision,
                        ))
            return

        if is_trunk_revision(revision):
            branch_name = None
        else:
            branch_name = self.sdc.rev_to_branch_data(revision).symbol.name

        cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
            self.project, branch_name, rev_data.author, log)
        cvs_rev.deltatext_exists = bool(text)

        # If this is revision 1.1, determine whether the file appears to
        # have been created via 'cvs add' instead of 'cvs import'.  The
        # test is that the log message CVS uses for 1.1 in imports is
        # "Initial revision\n" with no period.  (This fact helps determine
        # whether this file might have had a default branch in the past.)
        if revision == '1.1':
            self._file_imported = (log == 'Initial revision\n')
Exemple #31
0
    def define_symbol(self, name, revision):
        """Record a symbol definition for later processing."""

        # Canonicalize the revision number:
        revision = _branch_revision_re.sub(r'\1\2', revision)

        # Apply any user-defined symbol transforms to the symbol name:
        name = self.transform_symbol(name, revision)

        if name is not None:
            # Verify that the revision number is valid:
            if _valid_revision_re.match(revision):
                # The revision number is valid; record it for later processing:
                self._symbol_defs.append((name, revision))
            else:
                logger.warn('In %r:\n'
                            '    branch %r references invalid revision %s\n'
                            '    and will be ignored.' % (
                                self.cvs_file.rcs_path,
                                name,
                                revision,
                            ))
Exemple #32
0
    def _add_branch(self, name, branch_number):
        """Record that BRANCH_NUMBER is the branch number for branch NAME,
    and derive and record the revision from which NAME sprouts.
    BRANCH_NUMBER is an RCS branch number with an odd number of
    components, for example '1.7.2' (never '1.7.0.2').  Return the
    _BranchData instance (which is usually newly-created)."""

        branch_data = self.branches_data.get(branch_number)

        if branch_data is not None:
            logger.warn(
                "%s: in '%s':\n"
                "   branch '%s' already has name '%s',\n"
                "   cannot also have name '%s', ignoring the latter\n" %
                (warning_prefix, self.cvs_file.rcs_path, branch_number,
                 branch_data.symbol.name, name))
            return branch_data

        symbol = self.pdc.get_symbol(name)
        branch_data = _BranchData(
            self.collect_data.item_key_generator.gen_id(), symbol,
            branch_number)
        self.branches_data[branch_number] = branch_data
        return branch_data
Exemple #33
0
  def _add_pattern(self, pattern, props):
    propdict = {}
    if self.quoted_re.match(pattern):
      logger.warn(
          '%s: Quoting is not supported in auto-props; please verify rule\n'
          'for %r.  (Using pattern including quotation marks.)\n'
          % (warning_prefix, pattern,)
          )
    for prop in props.split(';'):
      prop = prop.strip()
      m = self.property_unset_re.match(prop)
      if m:
        name = m.group('name')
        logger.debug(
            'auto-props: For %r, leaving %r unset.' % (pattern, name,)
            )
        propdict[name] = None
        continue

      m = self.property_set_re.match(prop)
      if m:
        name = m.group('name')
        value = m.group('value')
        if self.quoted_re.match(value):
          logger.warn(
              '%s: Quoting is not supported in auto-props; please verify\n'
              'rule %r for pattern %r.  (Using value\n'
              'including quotation marks.)\n'
              % (warning_prefix, prop, pattern,)
              )
        logger.debug(
            'auto-props: For %r, setting %r to %r.' % (pattern, name, value,)
            )
        propdict[name] = value
        continue

      m = self.property_novalue_re.match(prop)
      if m:
        name = m.group('name')
        logger.debug(
            'auto-props: For %r, setting %r to the empty string'
            % (pattern, name,)
            )
        propdict[name] = ''
        continue

      logger.warn(
          '%s: in auto-props line for %r, value %r cannot be parsed (ignored)'
          % (warning_prefix, pattern, prop,)
          )

    self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
class SubtreeSymbolMapper(SymbolTransform):
    """A SymbolTransform that transforms symbols within a whole repo subtree.

  The user has to specify a CVS repository path (a filename or
  directory) and the original symbol name.  All symbols under that
  path will be renamed to the specified new name (which can be None if
  the symbol should be ignored).  The mappings can be set via a
  constructor argument or by calling __setitem__().  Only the most
  specific rule is applied."""
    def __init__(self, items=[]):
        """Initialize the mapper.

    ITEMS is a list of tuples (cvs_path, symbol_name, new_name)
    which will be set as mappings.  cvs_path is a string naming a
    directory within the CVS repository."""

        # A map {symbol_name : {cvs_path : new_name}}:
        self._map = {}

        for (cvs_path, symbol_name, new_name) in items:
            self[cvs_path, symbol_name] = new_name

    def __setitem__(self, (cvs_path, symbol_name), new_name):
        """Set a mapping for a particular file and symbol."""

        try:
            symbol_map = self._map[symbol_name]
        except KeyError:
            symbol_map = {}
            self._map[symbol_name] = symbol_map

        cvs_path = os.path.normcase(os.path.normpath(cvs_path))
        if cvs_path in symbol_map:
            logger.warn('Overwriting symbol transform for\n'
                        '    directory=%r symbol=%s' % (
                            cvs_path,
                            symbol_name,
                        ))
        symbol_map[cvs_path] = new_name
Exemple #35
0
    def _process_ntdbrs(self):
        """Fix up any non-trunk default branch revisions (if present).

    If a non-trunk default branch is determined to have existed, yield
    the _RevisionData.ids for all revisions that were once non-trunk
    default revisions, in dependency order.

    There are two cases to handle:

    One case is simple.  The RCS file lists a default branch
    explicitly in its header, such as '1.1.1'.  In this case, we know
    that every revision on the vendor branch is to be treated as head
    of trunk at that point in time.

    But there's also a degenerate case.  The RCS file does not
    currently have a default branch, yet we can deduce that for some
    period in the past it probably *did* have one.  For example, the
    file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
    dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
    after 1.2.  In this case, we should record 1.1.1.96 as the last
    vendor revision to have been the head of the default branch.

    If any non-trunk default branch revisions are found:

    - Set their ntdbr members to True.

    - Connect the last one with revision 1.2.

    - Remove revision 1.1 if it is not needed.

    """

        try:
            if self.default_branch:
                try:
                    vendor_cvs_branch_id = self.sdc.branches_data[
                        self.default_branch].id
                except KeyError:
                    logger.warn(
                        '%s: In %s:\n'
                        '    vendor branch %r is not present in file and will be ignored.'
                        % (
                            warning_prefix,
                            self.cvs_file.rcs_path,
                            self.default_branch,
                        ))
                    self.default_branch = None
                    return
                vendor_lod_items = self._cvs_file_items.get_lod_items(
                    self._cvs_file_items[vendor_cvs_branch_id])
                if not self._cvs_file_items.process_live_ntdb(
                        vendor_lod_items):
                    return
            elif self._file_imported:
                vendor_branch_data = self.sdc.branches_data.get('1.1.1')
                if vendor_branch_data is None:
                    return
                else:
                    vendor_lod_items = self._cvs_file_items.get_lod_items(
                        self._cvs_file_items[vendor_branch_data.id])
                    if not self._cvs_file_items.process_historical_ntdb(
                            vendor_lod_items):
                        return
            else:
                return
        except VendorBranchError, e:
            self.collect_data.record_fatal_error(str(e))
            return
Exemple #36
0
      'for more information.\n'
      % (error_prefix, anydbm._defaultmod.__name__,)
      )
  sys.exit(1)

# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
#    Unfortunately, gdbm appears not to be trouble free, either.
if hasattr(anydbm._defaultmod, 'bsddb') \
    and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
  try:
    gdbm = __import__('gdbm')
  except ImportError:
    logger.warn(
        '%s: The version of the bsddb module found on your computer '
            'has been\n'
        'reported to malfunction on some datasets, causing KeyError '
            'exceptions.\n'
        % (warning_prefix,)
        )
  else:
    anydbm._defaultmod = gdbm


class Database:
  """A database that uses a Serializer to store objects of a certain type.

  The serializer is stored in the database under the key
  self.serializer_key.  (This implies that self.serializer_key may not
  be used as a key for normal entries.)

  The backing database is an anydbm-based DBM.
Exemple #37
0
    except (cvs2svn_rcsparse.common.RCSParseError, RuntimeError):
      self.collect_data.record_fatal_error(
          "%r is not a valid ,v file" % (cvs_file.rcs_path,)
          )
      # Abort the processing of this file, but let the pass continue
      # with other files:
      return
    except ValueError, e:
      self.collect_data.record_fatal_error(
          "%r is not a valid ,v file (%s)" % (cvs_file.rcs_path, str(e),)
          )
      # Abort the processing of this file, but let the pass continue
      # with other files:
      return
    except:
      logger.warn("Exception occurred while parsing %s" % cvs_file.rcs_path)
      raise
    else:
      self.num_files += 1

    return fdc.get_cvs_file_items()


class CollectData:
  """Repository for data collected by parsing the CVS repository files.

  This class manages the databases into which information collected
  from the CVS repository is stored.  The data are stored into this
  class by _FileDataCollector instances, one of which is created for
  each file to be parsed."""
    def _add_pattern(self, pattern, props):
        propdict = {}
        if self.quoted_re.match(pattern):
            logger.warn(
                '%s: Quoting is not supported in auto-props; please verify rule\n'
                'for %r.  (Using pattern including quotation marks.)\n' % (
                    warning_prefix,
                    pattern,
                ))
        for prop in props.split(';'):
            prop = prop.strip()
            m = self.property_unset_re.match(prop)
            if m:
                name = m.group('name')
                logger.debug('auto-props: For %r, leaving %r unset.' % (
                    pattern,
                    name,
                ))
                propdict[name] = None
                continue

            m = self.property_set_re.match(prop)
            if m:
                name = m.group('name')
                value = m.group('value')
                if self.quoted_re.match(value):
                    logger.warn(
                        '%s: Quoting is not supported in auto-props; please verify\n'
                        'rule %r for pattern %r.  (Using value\n'
                        'including quotation marks.)\n' % (
                            warning_prefix,
                            prop,
                            pattern,
                        ))
                logger.debug('auto-props: For %r, setting %r to %r.' % (
                    pattern,
                    name,
                    value,
                ))
                propdict[name] = value
                continue

            m = self.property_novalue_re.match(prop)
            if m:
                name = m.group('name')
                logger.debug(
                    'auto-props: For %r, setting %r to the empty string' % (
                        pattern,
                        name,
                    ))
                propdict[name] = ''
                continue

            logger.warn(
                '%s: in auto-props line for %r, value %r cannot be parsed (ignored)'
                % (
                    warning_prefix,
                    pattern,
                    prop,
                ))

        self.patterns.append(
            self.Pattern(self.transform_case(pattern), propdict))
Exemple #39
0
        'for more information.\n' % (
            error_prefix,
            anydbm._defaultmod.__name__,
        ))
    sys.exit(1)

# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
#    Unfortunately, gdbm appears not to be trouble free, either.
if hasattr(anydbm._defaultmod, 'bsddb') \
    and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
    try:
        gdbm = __import__('gdbm')
    except ImportError:
        logger.warn(
            '%s: The version of the bsddb module found on your computer '
            'has been\n'
            'reported to malfunction on some datasets, causing KeyError '
            'exceptions.\n' % (warning_prefix, ))
    else:
        anydbm._defaultmod = gdbm


class Database:
    """A database that uses a Serializer to store objects of a certain type.

  The serializer is stored in the database under the key
  self.serializer_key.  (This implies that self.serializer_key may not
  be used as a key for normal entries.)

  The backing database is an anydbm-based DBM.
Exemple #40
0
  def _process_ntdbrs(self):
    """Fix up any non-trunk default branch revisions (if present).

    If a non-trunk default branch is determined to have existed, yield
    the _RevisionData.ids for all revisions that were once non-trunk
    default revisions, in dependency order.

    There are two cases to handle:

    One case is simple.  The RCS file lists a default branch
    explicitly in its header, such as '1.1.1'.  In this case, we know
    that every revision on the vendor branch is to be treated as head
    of trunk at that point in time.

    But there's also a degenerate case.  The RCS file does not
    currently have a default branch, yet we can deduce that for some
    period in the past it probably *did* have one.  For example, the
    file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
    dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
    after 1.2.  In this case, we should record 1.1.1.96 as the last
    vendor revision to have been the head of the default branch.

    If any non-trunk default branch revisions are found:

    - Set their ntdbr members to True.

    - Connect the last one with revision 1.2.

    - Remove revision 1.1 if it is not needed.

    """

    try:
      if self.default_branch:
        try:
          vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id
        except KeyError:
          logger.warn(
              '%s: In %s:\n'
              '    vendor branch %r is not present in file and will be ignored.'
              % (warning_prefix, self.cvs_file.rcs_path, self.default_branch,)
              )
          self.default_branch = None
          return
        vendor_lod_items = self._cvs_file_items.get_lod_items(
            self._cvs_file_items[vendor_cvs_branch_id]
            )
        if not self._cvs_file_items.process_live_ntdb(vendor_lod_items):
          return
      elif self._file_imported:
        vendor_branch_data = self.sdc.branches_data.get('1.1.1')
        if vendor_branch_data is None:
          return
        else:
          vendor_lod_items = self._cvs_file_items.get_lod_items(
              self._cvs_file_items[vendor_branch_data.id]
              )
          if not self._cvs_file_items.process_historical_ntdb(
                vendor_lod_items
                ):
            return
      else:
        return
    except VendorBranchError, e:
      self.collect_data.record_fatal_error(str(e))
      return
Exemple #41
0
            self.collect_data.record_fatal_error("%r is not a valid ,v file" %
                                                 (cvs_file.rcs_path, ))
            # Abort the processing of this file, but let the pass continue
            # with other files:
            return
        except ValueError, e:
            self.collect_data.record_fatal_error(
                "%r is not a valid ,v file (%s)" % (
                    cvs_file.rcs_path,
                    str(e),
                ))
            # Abort the processing of this file, but let the pass continue
            # with other files:
            return
        except:
            logger.warn("Exception occurred while parsing %s" %
                        cvs_file.rcs_path)
            raise
        else:
            self.num_files += 1

        return fdc.get_cvs_file_items()


class CollectData:
    """Repository for data collected by parsing the CVS repository files.

  This class manages the databases into which information collected
  from the CVS repository is stored.  The data are stored into this
  class by _FileDataCollector instances, one of which is created for
  each file to be parsed."""
    def __init__(self, stats_keeper):