Example #1
0
  def tree_completed(self):
    """The revision tree has been parsed.

    Analyze it for consistency and connect some loose ends.

    This is a callback method declared in Sink."""

    self._resolve_primary_dependencies()
    self._resolve_branch_dependencies()
    self._sort_branches()
    self._resolve_tag_dependencies()

    # Compute the preliminary CVSFileItems for this file:
    cvs_items = []
    cvs_items.extend(self._get_cvs_revisions())
    cvs_items.extend(self._get_cvs_branches())
    cvs_items.extend(self._get_cvs_tags())
    self._cvs_file_items = CVSFileItems(
        self.cvs_file, self.pdc.trunk, cvs_items
        )

    self._cvs_file_items.check_link_consistency()

    # Tell the revision recorder about the file dependency tree.
    self.collect_data.revision_recorder.start_file(self._cvs_file_items)
Example #2
0
  def tree_completed(self):
    """The revision tree has been parsed.

    Analyze it for consistency and connect some loose ends.

    This is a callback method declared in Sink."""

    self._resolve_primary_dependencies()
    self._resolve_branch_dependencies()
    self._sort_branches()
    self._resolve_tag_dependencies()

    # Compute the preliminary CVSFileItems for this file:
    cvs_items = []
    cvs_items.extend(self._get_cvs_revisions())
    cvs_items.extend(self._get_cvs_branches())
    cvs_items.extend(self._get_cvs_tags())
    self._cvs_file_items = CVSFileItems(
        self.cvs_file, self.pdc.trunk, cvs_items
        )

    self._cvs_file_items.check_link_consistency()
Example #3
0
class _FileDataCollector(cvs2svn_rcsparse.Sink):
  """Class responsible for collecting RCS data for a particular file.

  Any collected data that need to be remembered are stored into the
  referenced CollectData instance."""

  def __init__(self, pdc, cvs_file):
    """Create an object that is prepared to receive data for CVS_FILE.
    CVS_FILE is a CVSFile instance.  COLLECT_DATA is used to store the
    information collected about the file."""

    self.pdc = pdc
    self.cvs_file = cvs_file

    self.collect_data = self.pdc.collect_data
    self.project = self.cvs_file.project

    # A place to store information about the symbols in this file:
    self.sdc = _SymbolDataCollector(self, self.cvs_file)

    # { revision : _RevisionData instance }
    self._rev_data = { }

    # Lists [ (parent, child) ] of revision number pairs indicating
    # that revision child depends on revision parent along the main
    # line of development.
    self._primary_dependencies = []

    # If set, this is an RCS branch number -- rcsparse calls this the
    # "principal branch", but CVS and RCS refer to it as the "default
    # branch", so that's what we call it, even though the rcsparse API
    # setter method is still 'set_principal_branch'.
    self.default_branch = None

    # True iff revision 1.1 of the file appears to have been imported
    # (as opposed to added normally).
    self._file_imported = False

  def _get_rev_id(self, revision):
    if revision is None:
      return None
    return self._rev_data[revision].cvs_rev_id

  def set_principal_branch(self, branch):
    """This is a callback method declared in Sink."""

    if branch.find('.') == -1:
      # This just sets the default branch to trunk.  Normally this
      # shouldn't occur, but it has been seen in at least one CVS
      # repository.  Just ignore it.
      return

    m = _branch_revision_re.match(branch)
    if not m:
      self.collect_data.record_fatal_error(
          'The default branch %s in file %r is not a valid branch number'
          % (branch, self.cvs_file.rcs_path,)
          )
      return

    branch = m.group(1) + m.group(2)
    if branch.count('.') != 2:
      # We don't know how to deal with a non-top-level default
      # branch (what does CVS do?).  So if this case is detected,
      # punt:
      self.collect_data.record_fatal_error(
          'The default branch %s in file %r is not a top-level branch'
          % (branch, self.cvs_file.rcs_path,)
          )
      return

    self.default_branch = branch

  def define_tag(self, name, revision):
    """Remember the symbol name and revision, but don't process them yet.

    This is a callback method declared in Sink."""

    self.sdc.define_symbol(name, revision)

  def set_expansion(self, mode):
    """This is a callback method declared in Sink."""

    self.cvs_file.mode = mode

  def admin_completed(self):
    """This is a callback method declared in Sink."""

    self.sdc.process_symbols()

  def define_revision(self, revision, timestamp, author, state,
                      branches, next):
    """This is a callback method declared in Sink."""

    for branch in branches:
      try:
        branch_data = self.sdc.rev_to_branch_data(branch)
      except KeyError:
        # Normally we learn about the branches from the branch names
        # and numbers parsed from the symbolic name header.  But this
        # must have been an unlabeled branch that slipped through the
        # net.  Generate a name for it and create a _BranchData record
        # for it now.
        branch_data = self.sdc._add_unlabeled_branch(
            self.sdc.rev_to_branch_number(branch))

      assert branch_data.child is None
      branch_data.child = branch

    if revision in self._rev_data:
      # This revision has already been seen.
      logger.error('File %r contains duplicate definitions of revision %s.'
                  % (self.cvs_file.rcs_path, revision,))
      raise RuntimeError()

    # Record basic information about the revision:
    rev_data = _RevisionData(
        self.collect_data.item_key_generator.gen_id(),
        revision, int(timestamp), author, state)
    self._rev_data[revision] = rev_data

    # When on trunk, the RCS 'next' revision number points to what
    # humans might consider to be the 'previous' revision number.  For
    # example, 1.3's RCS 'next' is 1.2.
    #
    # However, on a branch, the RCS 'next' revision number really does
    # point to what humans would consider to be the 'next' revision
    # number.  For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
    #
    # In other words, in RCS, 'next' always means "where to find the next
    # deltatext that you need this revision to retrieve.
    #
    # That said, we don't *want* RCS's behavior here, so we determine
    # whether we're on trunk or a branch and set the dependencies
    # accordingly.
    if next:
      if is_trunk_revision(revision):
        self._primary_dependencies.append( (next, revision,) )
      else:
        self._primary_dependencies.append( (revision, next,) )

  def tree_completed(self):
    """The revision tree has been parsed.

    Analyze it for consistency and connect some loose ends.

    This is a callback method declared in Sink."""

    self._resolve_primary_dependencies()
    self._resolve_branch_dependencies()
    self._sort_branches()
    self._resolve_tag_dependencies()

    # Compute the preliminary CVSFileItems for this file:
    cvs_items = []
    cvs_items.extend(self._get_cvs_revisions())
    cvs_items.extend(self._get_cvs_branches())
    cvs_items.extend(self._get_cvs_tags())
    self._cvs_file_items = CVSFileItems(
        self.cvs_file, self.pdc.trunk, cvs_items
        )

    self._cvs_file_items.check_link_consistency()

  def _resolve_primary_dependencies(self):
    """Resolve the dependencies listed in self._primary_dependencies."""

    for (parent, child,) in self._primary_dependencies:
      parent_data = self._rev_data[parent]
      assert parent_data.child is None
      parent_data.child = child

      child_data = self._rev_data[child]
      assert child_data.parent is None
      child_data.parent = parent

  def _resolve_branch_dependencies(self):
    """Resolve dependencies involving branches."""

    for branch_data in self.sdc.branches_data.values():
      # The branch_data's parent has the branch as a child regardless
      # of whether the branch had any subsequent commits:
      try:
        parent_data = self._rev_data[branch_data.parent]
      except KeyError:
        logger.warn(
            'In %r:\n'
            '    branch %r references non-existing revision %s\n'
            '    and will be ignored.'
            % (self.cvs_file.rcs_path, branch_data.symbol.name,
               branch_data.parent,))
        del self.sdc.branches_data[branch_data.branch_number]
      else:
        parent_data.branches_data.append(branch_data)

        # If the branch has a child (i.e., something was committed on
        # the branch), then we store a reference to the branch_data
        # there, define the child's parent to be the branch's parent,
        # and list the child in the branch parent's branches_revs_data:
        if branch_data.child is not None:
          child_data = self._rev_data[branch_data.child]
          assert child_data.parent_branch_data is None
          child_data.parent_branch_data = branch_data
          assert child_data.parent is None
          child_data.parent = branch_data.parent
          parent_data.branches_revs_data.append(branch_data.child)

  def _sort_branches(self):
    """Sort the branches sprouting from each revision in creation order.

    Creation order is taken to be the reverse of the order that they
    are listed in the symbols part of the RCS file.  (If a branch is
    created then deleted, a later branch can be assigned the recycled
    branch number; therefore branch numbers are not an indication of
    creation order.)"""

    for rev_data in self._rev_data.values():
      rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id))

  def _resolve_tag_dependencies(self):
    """Resolve dependencies involving tags."""

    for (rev, tag_data_list) in self.sdc.tags_data.items():
      try:
        parent_data = self._rev_data[rev]
      except KeyError:
        logger.warn(
            'In %r:\n'
            '    the following tag(s) reference non-existing revision %s\n'
            '    and will be ignored:\n'
            '    %s' % (
                self.cvs_file.rcs_path, rev,
                ', '.join([repr(tag_data.symbol.name)
                           for tag_data in tag_data_list]),))
        del self.sdc.tags_data[rev]
      else:
        for tag_data in tag_data_list:
          assert tag_data.rev == rev
          # The tag_data's rev has the tag as a child:
          parent_data.tags_data.append(tag_data)

  def _get_cvs_branches(self):
    """Generate the CVSBranches present in this file."""

    for branch_data in self.sdc.branches_data.values():
      yield CVSBranch(
          branch_data.id, self.cvs_file, branch_data.symbol,
          branch_data.branch_number,
          self.sdc.rev_to_lod(branch_data.parent),
          self._get_rev_id(branch_data.parent),
          self._get_rev_id(branch_data.child),
          None,
          )

  def _get_cvs_tags(self):
    """Generate the CVSTags present in this file."""

    for tags_data in self.sdc.tags_data.values():
      for tag_data in tags_data:
        yield CVSTag(
            tag_data.id, self.cvs_file, tag_data.symbol,
            self.sdc.rev_to_lod(tag_data.rev),
            self._get_rev_id(tag_data.rev),
            None,
            )

  def set_description(self, description):
    """This is a callback method declared in Sink."""

    self.cvs_file.description = description
    self.cvs_file.determine_file_properties(Ctx().file_property_setters)

  def set_revision_info(self, revision, log, text):
    """This is a callback method declared in Sink."""

    rev_data = self._rev_data[revision]
    cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]

    if cvs_rev.metadata_id is not None:
      # Users have reported problems with repositories in which the
      # deltatext block for revision 1.1 appears twice.  It is not
      # known whether this results from a CVS/RCS bug, or from botched
      # hand-editing of the repository.  In any case, empirically, cvs
      # and rcs both use the first version when checking out data, so
      # that's what we will do.  (For the record: "cvs log" fails on
      # such a file; "rlog" prints the log message from the first
      # block and ignores the second one.)
      logger.warn(
          "%s: in '%s':\n"
          "   Deltatext block for revision %s appeared twice;\n"
          "   ignoring the second occurrence.\n"
          % (warning_prefix, self.cvs_file.rcs_path, revision,)
          )
      return

    if is_trunk_revision(revision):
      branch_name = None
    else:
      branch_name = self.sdc.rev_to_branch_data(revision).symbol.name

    cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
        self.project, branch_name, rev_data.author, log
        )
    cvs_rev.deltatext_exists = bool(text)

    # If this is revision 1.1, determine whether the file appears to
    # have been created via 'cvs add' instead of 'cvs import'.  The
    # test is that the log message CVS uses for 1.1 in imports is
    # "Initial revision\n" with no period.  (This fact helps determine
    # whether this file might have had a default branch in the past.)
    if revision == '1.1':
      self._file_imported = (log == 'Initial revision\n')

  def parse_completed(self):
    """Finish the processing of this file.

    This is a callback method declared in Sink."""

    # Make sure that there was an info section for each revision:
    for cvs_item in self._cvs_file_items.values():
      if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None:
        self.collect_data.record_fatal_error(
            '%r has no deltatext section for revision %s'
            % (self.cvs_file.rcs_path, cvs_item.rev,)
            )

  def _determine_operation(self, rev_data):
    prev_rev_data = self._rev_data.get(rev_data.parent)
    return cvs_revision_type_map[(
        rev_data.state != 'dead',
        prev_rev_data is not None and prev_rev_data.state != 'dead',
        )]

  def _get_cvs_revisions(self):
    """Generate the CVSRevisions present in this file."""

    for rev_data in self._rev_data.itervalues():
      yield self._get_cvs_revision(rev_data)

  def _get_cvs_revision(self, rev_data):
    """Create and return a CVSRevision for REV_DATA."""

    branch_ids = [
        branch_data.id
        for branch_data in rev_data.branches_data
        ]

    branch_commit_ids = [
        self._get_rev_id(rev)
        for rev in rev_data.branches_revs_data
        ]

    tag_ids = [
        tag_data.id
        for tag_data in rev_data.tags_data
        ]

    revision_type = self._determine_operation(rev_data)

    return revision_type(
        self._get_rev_id(rev_data.rev), self.cvs_file,
        rev_data.timestamp, None,
        self._get_rev_id(rev_data.parent),
        self._get_rev_id(rev_data.child),
        rev_data.rev,
        True,
        self.sdc.rev_to_lod(rev_data.rev),
        rev_data.get_first_on_branch_id(),
        False, None, None,
        tag_ids, branch_ids, branch_commit_ids,
        rev_data.revision_reader_token
        )

  def get_cvs_file_items(self):
    """Finish up and return a CVSFileItems instance for this file.

    This method must only be called once."""

    self._process_ntdbrs()

    # Break a circular reference loop, allowing the memory for self
    # and sdc to be freed.
    del self.sdc

    return self._cvs_file_items

  def _process_ntdbrs(self):
    """Fix up any non-trunk default branch revisions (if present).

    If a non-trunk default branch is determined to have existed, yield
    the _RevisionData.ids for all revisions that were once non-trunk
    default revisions, in dependency order.

    There are two cases to handle:

    One case is simple.  The RCS file lists a default branch
    explicitly in its header, such as '1.1.1'.  In this case, we know
    that every revision on the vendor branch is to be treated as head
    of trunk at that point in time.

    But there's also a degenerate case.  The RCS file does not
    currently have a default branch, yet we can deduce that for some
    period in the past it probably *did* have one.  For example, the
    file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
    dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
    after 1.2.  In this case, we should record 1.1.1.96 as the last
    vendor revision to have been the head of the default branch.

    If any non-trunk default branch revisions are found:

    - Set their ntdbr members to True.

    - Connect the last one with revision 1.2.

    - Remove revision 1.1 if it is not needed.

    """

    try:
      if self.default_branch:
        vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id
        vendor_lod_items = self._cvs_file_items.get_lod_items(
            self._cvs_file_items[vendor_cvs_branch_id]
            )
        if not self._cvs_file_items.process_live_ntdb(vendor_lod_items):
          return
      elif self._file_imported:
        vendor_branch_data = self.sdc.branches_data.get('1.1.1')
        if vendor_branch_data is None:
          return
        else:
          vendor_lod_items = self._cvs_file_items.get_lod_items(
              self._cvs_file_items[vendor_branch_data.id]
              )
          if not self._cvs_file_items.process_historical_ntdb(
                vendor_lod_items
                ):
            return
      else:
        return
    except VendorBranchError, e:
      self.collect_data.record_fatal_error(str(e))
      return

    if self._file_imported:
      self._cvs_file_items.imported_remove_1_1(vendor_lod_items)

    self._cvs_file_items.check_link_consistency()
Example #4
0
class _FileDataCollector(Sink):
    """Class responsible for collecting RCS data for a particular file.

  Any collected data that need to be remembered are stored into the
  referenced CollectData instance."""
    def __init__(self, pdc, cvs_file):
        """Create an object that is prepared to receive data for CVS_FILE.
    CVS_FILE is a CVSFile instance.  COLLECT_DATA is used to store the
    information collected about the file."""

        self.pdc = pdc
        self.cvs_file = cvs_file

        self.collect_data = self.pdc.collect_data
        self.project = self.cvs_file.project

        # A place to store information about the symbols in this file:
        self.sdc = _SymbolDataCollector(self, self.cvs_file)

        # { revision : _RevisionData instance }
        self._rev_data = {}

        # Lists [ (parent, child) ] of revision number pairs indicating
        # that revision child depends on revision parent along the main
        # line of development.
        self._primary_dependencies = []

        # If set, this is an RCS branch number -- rcsparse calls this the
        # "principal branch", but CVS and RCS refer to it as the "default
        # branch", so that's what we call it, even though the rcsparse API
        # setter method is still 'set_principal_branch'.
        self.default_branch = None

        # True iff revision 1.1 of the file appears to have been imported
        # (as opposed to added normally).
        self._file_imported = False

    def _get_rev_id(self, revision):
        if revision is None:
            return None
        return self._rev_data[revision].cvs_rev_id

    def set_principal_branch(self, branch):
        """This is a callback method declared in Sink."""

        if branch.find('.') == -1:
            # This just sets the default branch to trunk.  Normally this
            # shouldn't occur, but it has been seen in at least one CVS
            # repository.  Just ignore it.
            return

        m = _branch_revision_re.match(branch)
        if not m:
            self.collect_data.record_fatal_error(
                'The default branch %s in file %r is not a valid branch number'
                % (
                    branch,
                    self.cvs_file.rcs_path,
                ))
            return

        branch = m.group(1) + m.group(2)
        if branch.count('.') != 2:
            # We don't know how to deal with a non-top-level default
            # branch (what does CVS do?).  So if this case is detected,
            # punt:
            self.collect_data.record_fatal_error(
                'The default branch %s in file %r is not a top-level branch' %
                (
                    branch,
                    self.cvs_file.rcs_path,
                ))
            return

        self.default_branch = branch

    def define_tag(self, name, revision):
        """Remember the symbol name and revision, but don't process them yet.

    This is a callback method declared in Sink."""

        self.sdc.define_symbol(name, revision)

    def set_expansion(self, mode):
        """This is a callback method declared in Sink."""

        self.cvs_file.mode = mode

    def admin_completed(self):
        """This is a callback method declared in Sink."""

        self.sdc.process_symbols()

    def define_revision(self, revision, timestamp, author, state, branches,
                        next):
        """This is a callback method declared in Sink."""

        for branch in branches:
            try:
                branch_data = self.sdc.rev_to_branch_data(branch)
            except KeyError:
                # Normally we learn about the branches from the branch names
                # and numbers parsed from the symbolic name header.  But this
                # must have been an unlabeled branch that slipped through the
                # net.  Generate a name for it and create a _BranchData record
                # for it now.
                branch_data = self.sdc._add_unlabeled_branch(
                    self.sdc.rev_to_branch_number(branch))

            assert branch_data.child is None
            branch_data.child = branch

        if revision in self._rev_data:
            # This revision has already been seen.
            logger.error(
                'File %r contains duplicate definitions of revision %s.' % (
                    self.cvs_file.rcs_path,
                    revision,
                ))
            raise RuntimeError()

        # Record basic information about the revision:
        rev_data = _RevisionData(self.collect_data.item_key_generator.gen_id(),
                                 revision, int(timestamp), author, state)
        self._rev_data[revision] = rev_data

        # When on trunk, the RCS 'next' revision number points to what
        # humans might consider to be the 'previous' revision number.  For
        # example, 1.3's RCS 'next' is 1.2.
        #
        # However, on a branch, the RCS 'next' revision number really does
        # point to what humans would consider to be the 'next' revision
        # number.  For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
        #
        # In other words, in RCS, 'next' always means "where to find the next
        # deltatext that you need this revision to retrieve.
        #
        # That said, we don't *want* RCS's behavior here, so we determine
        # whether we're on trunk or a branch and set the dependencies
        # accordingly.
        if next:
            if is_trunk_revision(revision):
                self._primary_dependencies.append((
                    next,
                    revision,
                ))
            else:
                self._primary_dependencies.append((
                    revision,
                    next,
                ))

    def tree_completed(self):
        """The revision tree has been parsed.

    Analyze it for consistency and connect some loose ends.

    This is a callback method declared in Sink."""

        self._resolve_primary_dependencies()
        self._resolve_branch_dependencies()
        self._sort_branches()
        self._resolve_tag_dependencies()

        # Compute the preliminary CVSFileItems for this file:
        cvs_items = []
        cvs_items.extend(self._get_cvs_revisions())
        cvs_items.extend(self._get_cvs_branches())
        cvs_items.extend(self._get_cvs_tags())
        self._cvs_file_items = CVSFileItems(self.cvs_file, self.pdc.trunk,
                                            cvs_items)

        self._cvs_file_items.check_link_consistency()

    def _resolve_primary_dependencies(self):
        """Resolve the dependencies listed in self._primary_dependencies."""

        for (
                parent,
                child,
        ) in self._primary_dependencies:
            parent_data = self._rev_data[parent]
            assert parent_data.child is None
            parent_data.child = child

            child_data = self._rev_data[child]
            assert child_data.parent is None
            child_data.parent = parent

    def _resolve_branch_dependencies(self):
        """Resolve dependencies involving branches."""

        for branch_data in self.sdc.branches_data.values():
            # The branch_data's parent has the branch as a child regardless
            # of whether the branch had any subsequent commits:
            try:
                parent_data = self._rev_data[branch_data.parent]
            except KeyError:
                logger.warn(
                    'In %r:\n'
                    '    branch %r references non-existing revision %s\n'
                    '    and will be ignored.' % (
                        self.cvs_file.rcs_path,
                        branch_data.symbol.name,
                        branch_data.parent,
                    ))
                del self.sdc.branches_data[branch_data.branch_number]
            else:
                parent_data.branches_data.append(branch_data)

                # If the branch has a child (i.e., something was committed on
                # the branch), then we store a reference to the branch_data
                # there, define the child's parent to be the branch's parent,
                # and list the child in the branch parent's branches_revs_data:
                if branch_data.child is not None:
                    child_data = self._rev_data[branch_data.child]
                    assert child_data.parent_branch_data is None
                    child_data.parent_branch_data = branch_data
                    assert child_data.parent is None
                    child_data.parent = branch_data.parent
                    parent_data.branches_revs_data.append(branch_data.child)

    def _sort_branches(self):
        """Sort the branches sprouting from each revision in creation order.

    Creation order is taken to be the reverse of the order that they
    are listed in the symbols part of the RCS file.  (If a branch is
    created then deleted, a later branch can be assigned the recycled
    branch number; therefore branch numbers are not an indication of
    creation order.)"""

        for rev_data in self._rev_data.values():
            rev_data.branches_data.sort(lambda a, b: -cmp(a.id, b.id))

    def _resolve_tag_dependencies(self):
        """Resolve dependencies involving tags."""

        for (rev, tag_data_list) in self.sdc.tags_data.items():
            try:
                parent_data = self._rev_data[rev]
            except KeyError:
                logger.warn(
                    'In %r:\n'
                    '    the following tag(s) reference non-existing revision %s\n'
                    '    and will be ignored:\n'
                    '    %s' % (
                        self.cvs_file.rcs_path,
                        rev,
                        ', '.join([
                            repr(tag_data.symbol.name)
                            for tag_data in tag_data_list
                        ]),
                    ))
                del self.sdc.tags_data[rev]
            else:
                for tag_data in tag_data_list:
                    assert tag_data.rev == rev
                    # The tag_data's rev has the tag as a child:
                    parent_data.tags_data.append(tag_data)

    def _get_cvs_branches(self):
        """Generate the CVSBranches present in this file."""

        for branch_data in self.sdc.branches_data.values():
            yield CVSBranch(
                branch_data.id,
                self.cvs_file,
                branch_data.symbol,
                branch_data.branch_number,
                self.sdc.rev_to_lod(branch_data.parent),
                self._get_rev_id(branch_data.parent),
                self._get_rev_id(branch_data.child),
                None,
            )

    def _get_cvs_tags(self):
        """Generate the CVSTags present in this file."""

        for tags_data in self.sdc.tags_data.values():
            for tag_data in tags_data:
                yield CVSTag(
                    tag_data.id,
                    self.cvs_file,
                    tag_data.symbol,
                    self.sdc.rev_to_lod(tag_data.rev),
                    self._get_rev_id(tag_data.rev),
                    None,
                )

    def set_description(self, description):
        """This is a callback method declared in Sink."""

        self.cvs_file.description = description
        self.cvs_file.determine_file_properties(Ctx().file_property_setters)

    def set_revision_info(self, revision, log, text):
        """This is a callback method declared in Sink."""

        rev_data = self._rev_data[revision]
        cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]

        if cvs_rev.metadata_id is not None:
            # Users have reported problems with repositories in which the
            # deltatext block for revision 1.1 appears twice.  It is not
            # known whether this results from a CVS/RCS bug, or from botched
            # hand-editing of the repository.  In any case, empirically, cvs
            # and rcs both use the first version when checking out data, so
            # that's what we will do.  (For the record: "cvs log" fails on
            # such a file; "rlog" prints the log message from the first
            # block and ignores the second one.)
            logger.warn("%s: in '%s':\n"
                        "   Deltatext block for revision %s appeared twice;\n"
                        "   ignoring the second occurrence.\n" % (
                            warning_prefix,
                            self.cvs_file.rcs_path,
                            revision,
                        ))
            return

        if is_trunk_revision(revision):
            branch_name = None
        else:
            branch_name = self.sdc.rev_to_branch_data(revision).symbol.name

        cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
            self.project, branch_name, rev_data.author, log)
        cvs_rev.deltatext_exists = bool(text)

        # If this is revision 1.1, determine whether the file appears to
        # have been created via 'cvs add' instead of 'cvs import'.  The
        # test is that the log message CVS uses for 1.1 in imports is
        # "Initial revision\n" with no period.  (This fact helps determine
        # whether this file might have had a default branch in the past.)
        if revision == '1.1':
            self._file_imported = (log == 'Initial revision\n')

    def parse_completed(self):
        """Finish the processing of this file.

    This is a callback method declared in Sink."""

        # Make sure that there was an info section for each revision:
        for cvs_item in self._cvs_file_items.values():
            if isinstance(cvs_item,
                          CVSRevision) and cvs_item.metadata_id is None:
                self.collect_data.record_fatal_error(
                    '%r has no deltatext section for revision %s' % (
                        self.cvs_file.rcs_path,
                        cvs_item.rev,
                    ))

    def _determine_operation(self, rev_data):
        prev_rev_data = self._rev_data.get(rev_data.parent)
        return cvs_revision_type_map[(
            rev_data.state != 'dead',
            prev_rev_data is not None and prev_rev_data.state != 'dead',
        )]

    def _get_cvs_revisions(self):
        """Generate the CVSRevisions present in this file."""

        for rev_data in self._rev_data.itervalues():
            yield self._get_cvs_revision(rev_data)

    def _get_cvs_revision(self, rev_data):
        """Create and return a CVSRevision for REV_DATA."""

        branch_ids = [branch_data.id for branch_data in rev_data.branches_data]

        branch_commit_ids = [
            self._get_rev_id(rev) for rev in rev_data.branches_revs_data
        ]

        tag_ids = [tag_data.id for tag_data in rev_data.tags_data]

        revision_type = self._determine_operation(rev_data)

        return revision_type(self._get_rev_id(rev_data.rev), self.cvs_file,
                             rev_data.timestamp, None,
                             self._get_rev_id(rev_data.parent),
                             self._get_rev_id(rev_data.child), rev_data.rev,
                             True, self.sdc.rev_to_lod(rev_data.rev),
                             rev_data.get_first_on_branch_id(), False, None,
                             None, tag_ids, branch_ids, branch_commit_ids,
                             rev_data.revision_reader_token)

    def get_cvs_file_items(self):
        """Finish up and return a CVSFileItems instance for this file.

    This method must only be called once."""

        self._process_ntdbrs()

        # Break a circular reference loop, allowing the memory for self
        # and sdc to be freed.
        del self.sdc

        return self._cvs_file_items

    def _process_ntdbrs(self):
        """Fix up any non-trunk default branch revisions (if present).

    If a non-trunk default branch is determined to have existed, yield
    the _RevisionData.ids for all revisions that were once non-trunk
    default revisions, in dependency order.

    There are two cases to handle:

    One case is simple.  The RCS file lists a default branch
    explicitly in its header, such as '1.1.1'.  In this case, we know
    that every revision on the vendor branch is to be treated as head
    of trunk at that point in time.

    But there's also a degenerate case.  The RCS file does not
    currently have a default branch, yet we can deduce that for some
    period in the past it probably *did* have one.  For example, the
    file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
    dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
    after 1.2.  In this case, we should record 1.1.1.96 as the last
    vendor revision to have been the head of the default branch.

    If any non-trunk default branch revisions are found:

    - Set their ntdbr members to True.

    - Connect the last one with revision 1.2.

    - Remove revision 1.1 if it is not needed.

    """

        try:
            if self.default_branch:
                try:
                    vendor_cvs_branch_id = self.sdc.branches_data[
                        self.default_branch].id
                except KeyError:
                    logger.warn(
                        '%s: In %s:\n'
                        '    vendor branch %r is not present in file and will be ignored.'
                        % (
                            warning_prefix,
                            self.cvs_file.rcs_path,
                            self.default_branch,
                        ))
                    self.default_branch = None
                    return
                vendor_lod_items = self._cvs_file_items.get_lod_items(
                    self._cvs_file_items[vendor_cvs_branch_id])
                if not self._cvs_file_items.process_live_ntdb(
                        vendor_lod_items):
                    return
            elif self._file_imported:
                vendor_branch_data = self.sdc.branches_data.get('1.1.1')
                if vendor_branch_data is None:
                    return
                else:
                    vendor_lod_items = self._cvs_file_items.get_lod_items(
                        self._cvs_file_items[vendor_branch_data.id])
                    if not self._cvs_file_items.process_historical_ntdb(
                            vendor_lod_items):
                        return
            else:
                return
        except VendorBranchError, e:
            self.collect_data.record_fatal_error(str(e))
            return

        if self._file_imported:
            self._cvs_file_items.imported_remove_1_1(vendor_lod_items)

        self._cvs_file_items.check_link_consistency()