def __init__(self):
    self.cvs_file_db = Ctx()._cvs_file_db
    self.db = IndexedDatabase(
        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=MarshalSerializer(),
        )

    # A list of the maximum node_id stored by each call to
    # write_new_nodes():
    self._max_node_ids = [0]

    # A map {node_id : {cvs_path : node_id}}:
    self._cache = {}

    # The number of directories in the repository:
    num_dirs = len([
        cvs_path
        for cvs_path in self.cvs_file_db.itervalues()
        if isinstance(cvs_path, CVSDirectory)
        ])

    self._cache_max_size = max(
        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
        self.MIN_CACHE_LIMIT,
        )
Example #2
0
  def start(self):
    self._delta_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
        DB_OPEN_READ,
        )
    self._delta_db.__delitem__ = lambda id: None
    self._tree_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
        DB_OPEN_READ,
        )
    serializer = MarshalSerializer()
    if self._compress:
      serializer = CompressingSerializer(serializer)
    self._co_db = Database(
        artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB),
        DB_OPEN_NEW, serializer,
        )

    # The set of CVSFile instances whose TextRecords have already been
    # read:
    self._loaded_files = set()

    # A map { CVSFILE : _FileTree } for files that currently have live
    # revisions:
    self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
Example #3
0
class InternalRevisionCollector(RevisionCollector):
  """The RevisionCollector used by InternalRevisionReader."""

  def __init__(self, compress):
    RevisionCollector.__init__(self)
    self._compress = compress

  def register_artifacts(self, which_pass):
    artifact_manager.register_temp_file(
        config.RCS_DELTAS_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
    artifact_manager.register_temp_file(
        config.RCS_TREES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)

  def start(self):
    serializer = MarshalSerializer()
    if self._compress:
      serializer = CompressingSerializer(serializer)
    self._delta_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
        DB_OPEN_NEW, serializer,
        )
    primer = (FullTextRecord, DeltaTextRecord)
    self._rcs_trees = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
        DB_OPEN_NEW, PrimedPickleSerializer(primer),
        )

  def _writeout(self, text_record, text):
    self.text_record_db.add(text_record)
    self._delta_db[text_record.id] = text

  def process_file(self, cvs_file_items):
    """Read revision information for the file described by CVS_FILE_ITEMS.

    Compute the text record refcounts, discard any records that are
    unneeded, and store the text records for the file to the
    _rcs_trees database."""

    # A map from cvs_rev_id to TextRecord instance:
    self.text_record_db = TextRecordDatabase(self._delta_db, NullDatabase())

    cvs2svn_rcsparse.parse(
        open(cvs_file_items.cvs_file.rcs_path, 'rb'),
        _Sink(self, cvs_file_items),
        )

    self.text_record_db.recompute_refcounts(cvs_file_items)
    self.text_record_db.free_unused()
    self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
    del self.text_record_db

  def finish(self):
    self._delta_db.close()
    self._rcs_trees.close()
Example #4
0
class InternalRevisionExcluder(RevisionExcluder):
    """The RevisionExcluder used by InternalRevisionReader."""
    def register_artifacts(self, which_pass):
        artifact_manager.register_temp_file_needed(config.RCS_TREES_STORE,
                                                   which_pass)
        artifact_manager.register_temp_file_needed(
            config.RCS_TREES_INDEX_TABLE, which_pass)
        artifact_manager.register_temp_file(config.RCS_TREES_FILTERED_STORE,
                                            which_pass)
        artifact_manager.register_temp_file(
            config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass)

    def start(self):
        self._tree_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_TREES_STORE),
            artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
            DB_OPEN_READ)
        primer = (FullTextRecord, DeltaTextRecord)
        self._new_tree_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
            artifact_manager.get_temp_file(
                config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW,
            PrimedPickleSerializer(primer))

    def process_file(self, cvs_file_items):
        text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
        text_record_db.recompute_refcounts(cvs_file_items)
        text_record_db.free_unused()
        self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db

    def finish(self):
        self._tree_db.close()
        self._new_tree_db.close()
Example #5
0
 def start(self):
   self._tree_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_TREES_STORE),
       artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
       DB_OPEN_READ)
   primer = (FullTextRecord, DeltaTextRecord)
   self._new_tree_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
       artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
       DB_OPEN_NEW, PrimedPickleSerializer(primer))
Example #6
0
 def start(self):
     self._tree_db = IndexedDatabase(
         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
         DB_OPEN_READ)
     primer = (FullTextRecord, DeltaTextRecord)
     self._new_tree_db = IndexedDatabase(
         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
         artifact_manager.get_temp_file(
             config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW,
         PrimedPickleSerializer(primer))
Example #7
0
 def start(self):
     ser = MarshalSerializer()
     if self._compress:
         ser = CompressingSerializer(ser)
     self._rcs_deltas = IndexedDatabase(
         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
         DB_OPEN_NEW, ser)
     primer = (FullTextRecord, DeltaTextRecord)
     self._rcs_trees = IndexedDatabase(
         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
         DB_OPEN_NEW, PrimedPickleSerializer(primer))
Example #8
0
 def start(self):
   serializer = MarshalSerializer()
   if self._compress:
     serializer = CompressingSerializer(serializer)
   self._delta_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
       artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
       DB_OPEN_NEW, serializer,
       )
   primer = (FullTextRecord, DeltaTextRecord)
   self._rcs_trees = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_TREES_STORE),
       artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
       DB_OPEN_NEW, PrimedPickleSerializer(primer),
       )
Example #9
0
 def __init__(self, mode):
   self.mode = mode
   if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
     raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
   primer = (
       SVNInitialProjectCommit,
       SVNPrimaryCommit,
       SVNPostCommit,
       SVNBranchCommit,
       SVNTagCommit,
       )
   serializer = PrimedPickleSerializer(primer)
   self.svn_commit_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
       artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
       mode, serializer)
   self.cvs2svn_db = RecordTable(
       artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
       mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
Example #10
0
def MetadataDatabase(store_filename, index_table_filename, mode):
  """A Database to store Metadata instances that describe CVSRevisions.

  This database manages a map

      id -> Metadata instance

  where id is a unique identifier for the metadata."""

  return IndexedDatabase(
      store_filename, index_table_filename,
      mode, PrimedPickleSerializer((Metadata,)),
      )
Example #11
0
class InternalRevisionExcluder(RevisionExcluder):
  """The RevisionExcluder used by InternalRevisionReader."""

  def register_artifacts(self, which_pass):
    artifact_manager.register_temp_file_needed(
        config.RCS_TREES_STORE, which_pass
        )
    artifact_manager.register_temp_file_needed(
        config.RCS_TREES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(
        config.RCS_TREES_FILTERED_STORE, which_pass
        )
    artifact_manager.register_temp_file(
        config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
        )

  def start(self):
    self._tree_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
        DB_OPEN_READ)
    primer = (FullTextRecord, DeltaTextRecord)
    self._new_tree_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
        DB_OPEN_NEW, PrimedPickleSerializer(primer))

  def process_file(self, cvs_file_items):
    text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
    text_record_db.recompute_refcounts(cvs_file_items)
    text_record_db.free_unused()
    self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db

  def finish(self):
    self._tree_db.close()
    self._new_tree_db.close()
Example #12
0
    def open(self):
        """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

        self._key_generator = KeyGenerator()

        self._delegates = []

        # A map from LOD to LODHistory instance for all LODs that have
        # been defines so far:
        self._lod_histories = {}

        # This corresponds to the 'nodes' table in a Subversion fs.  (We
        # don't need a 'representations' or 'strings' table because we
        # only track metadata, not file contents.)
        self._nodes_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(
                config.SVN_MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=_NodeSerializer())

        # Start at revision 0 without a root node.  It will be created
        # by _open_writable_root_node.
        self._youngest = 0
Example #13
0
 def __init__(self, mode):
     self.mode = mode
     if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
         raise RuntimeError("Invalid 'mode' argument to PersistenceManager")
     primer = (SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit)
     serializer = PrimedPickleSerializer(primer)
     self.svn_commit_db = IndexedDatabase(
         artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
         artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
         mode,
         serializer,
     )
     self.cvs2svn_db = RecordTable(
         artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
         mode,
         SignedIntegerPacker(SVN_INVALID_REVNUM),
     )
Example #14
0
  def open(self):
    """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

    self._key_generator = KeyGenerator()

    self._delegates = [ ]

    # A map from LOD to LODHistory instance for all LODs that have
    # been defines so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track metadata, not file contents.)
    self._nodes_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=_NodeSerializer()
        )

    # Start at revision 0 without a root node.  It will be created
    # by _open_writable_root_node.
    self._youngest = 0
Example #15
0
class SVNRepositoryMirror:
  """Mirror a Subversion repository and its history.

  Mirror a Subversion repository as it is constructed, one SVNCommit
  at a time.  For each LineOfDevelopment we store a skeleton of the
  directory structure within that LOD for each SVN revision number in
  which it changed.  The creation of a dumpfile or Subversion
  repository is handled by delegates.  See the add_delegate() method
  for how to set delegates.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  SVNRevision in which files were added to or deleted from that LOD,
  as well as the node id of the node tree describing the LOD contents
  at that SVN revision.

  The LOD trees themselves are stored in the _nodes_db database, which
  maps node ids to nodes.  A node is a map from CVSPath.id to ids of
  the corresponding subnodes.  The _nodes_db is stored on disk and
  each access is expensive.

  The _nodes_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each SVNCommit and
  end_commit() afterwards.

  *** WARNING *** Path arguments to methods in this class MUST NOT
      have leading or trailing slashes."""

  class ParentMissingError(Exception):
    """The parent of a path is missing.

    Exception raised if an attempt is made to add a path to the
    repository mirror but the parent's path doesn't exist in the
    youngest revision of the repository."""

    pass

  class PathExistsError(Exception):
    """The path already exists in the repository.

    Exception raised if an attempt is made to add a path to the
    repository mirror and that path already exists in the youngest
    revision of the repository."""

    pass

  def register_artifacts(self, which_pass):
    """Register the artifacts that will be needed for this object."""

    artifact_manager.register_temp_file(
        config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(
        config.SVN_MIRROR_NODES_STORE, which_pass
        )

  def open(self):
    """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

    self._key_generator = KeyGenerator()

    self._delegates = [ ]

    # A map from LOD to LODHistory instance for all LODs that have
    # been defines so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track metadata, not file contents.)
    self._nodes_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=_NodeSerializer()
        )

    # Start at revision 0 without a root node.  It will be created
    # by _open_writable_root_node.
    self._youngest = 0

  def start_commit(self, revnum, revprops):
    """Start a new commit."""

    self._youngest = revnum

    # A map {node_id : {CVSPath : node_id}}.
    self._new_nodes = {}

    self._invoke_delegates('start_commit', revnum, revprops)

  def end_commit(self):
    """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

    # Copy the new nodes to the _nodes_db
    for id, value in self._new_nodes.items():
      self._nodes_db[id] = value

    del self._new_nodes

    self._invoke_delegates('end_commit')

  def _get_lod_history(self, lod):
    """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

    try:
      return self._lod_histories[lod]
    except KeyError:
      lod_history = LODHistory()
      self._lod_histories[lod] = lod_history
      return lod_history

  def _create_empty_node(self):
    """Create and return a new, empty, writable node."""

    new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {})
    self._new_nodes[new_node.id] = new_node.entries
    return new_node

  def _copy_node(self, old_node):
    """Create and return a new, writable node that is a copy of OLD_NODE."""

    new_node = _WritableMirrorNode(
        self, self._key_generator.gen_id(), old_node.entries.copy()
        )

    self._new_nodes[new_node.id] = new_node.entries
    return new_node

  def _get_node(self, id):
    """Return the node for id ID.

    The node might be read from either self._nodes_db or
    self._new_nodes.  Return an instance of _MirrorNode."""

    try:
      return _WritableMirrorNode(self, id, self._new_nodes[id])
    except KeyError:
      return _ReadOnlyMirrorNode(self, id, self._nodes_db[id])

  def _open_readonly_lod_node(self, lod, revnum):
    """Open a readonly node for the root path of LOD at revision REVNUM.

    Return an instance of _MirrorNode if the path exists; otherwise,
    raise KeyError."""

    lod_history = self._get_lod_history(lod)
    node_id = lod_history.get_id(revnum)
    return self._get_node(node_id)

  def _open_readonly_node(self, cvs_path, lod, revnum):
    """Open a readonly node for CVS_PATH from LOD at REVNUM.

    If cvs_path refers to a leaf node, return None.

    Raise KeyError if the node does not exist."""

    if cvs_path.parent_directory is None:
      return self._open_readonly_lod_node(lod, revnum)
    else:
      parent_node = self._open_readonly_node(
          cvs_path.parent_directory, lod, revnum
          )
      return parent_node[cvs_path]

  def _open_writable_lod_node(self, lod, create, invoke_delegates=True):
    """Open a writable node for the root path in LOD.

    Iff CREATE is True, create the path and any missing directories.
    Return an instance of _WritableMirrorNode.  Raise KeyError if the
    path doesn't already exist and CREATE is not set."""

    lod_history = self._get_lod_history(lod)
    try:
      id = lod_history.get_id()
    except KeyError:
      if create:
        node = self._create_empty_node()
        lod_history.update(self._youngest, node.id)
        if invoke_delegates:
          self._invoke_delegates('initialize_lod', lod)
      else:
        raise
    else:
      node = self._get_node(id)
      if not isinstance(node, _WritableMirrorNode):
        # Node was created in an earlier revision, so we have to copy
        # it to make it writable:
        node = self._copy_node(node)
        lod_history.update(self._youngest, node.id)

    return node

  def _open_writable_node(self, cvs_directory, lod, create):
    """Open a writable node for CVS_DIRECTORY in LOD.

    Iff CREATE is True, create a directory node at SVN_PATH and any
    missing directories.  Return an instance of _WritableMirrorNode.

    Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not
    set."""

    if cvs_directory.parent_directory is None:
      return self._open_writable_lod_node(lod, create)

    parent_node = self._open_writable_node(
        cvs_directory.parent_directory, lod, create
        )

    try:
      node = parent_node[cvs_directory]
    except KeyError:
      if create:
        # The component does not exist, so we create it.
        new_node = self._create_empty_node()
        parent_node[cvs_directory] = new_node
        self._invoke_delegates('mkdir', lod, cvs_directory)
        return new_node
      else:
        raise
    else:
      if isinstance(node, _WritableMirrorNode):
        return node
      elif isinstance(node, _ReadOnlyMirrorNode):
        new_node = self._copy_node(node)
        parent_node[cvs_directory] = new_node
        return new_node
      else:
        raise InternalError(
            'Attempt to modify file at %s in mirror' % (cvs_directory,)
            )

  def delete_lod(self, lod):
    """Delete the main path for LOD from the tree.

    The path must currently exist.  Silently refuse to delete trunk
    paths."""

    if isinstance(lod, Trunk):
      # Never delete a Trunk path.
      return

    lod_history = self._get_lod_history(lod)
    if not lod_history.exists():
      raise KeyError()
    lod_history.update(self._youngest, None)
    self._invoke_delegates('delete_lod', lod)

  def delete_path(self, cvs_path, lod, should_prune=False):
    """Delete CVS_PATH from LOD."""

    if cvs_path.parent_directory is None:
      self.delete_lod(lod)
      return
    else:
      parent_node = self._open_writable_node(
          cvs_path.parent_directory, lod, False
          )
      del parent_node[cvs_path]
      self._invoke_delegates('delete_path', lod, cvs_path)

      # The following recursion makes pruning an O(n^2) operation in the
      # worst case (where n is the depth of SVN_PATH), but the worst case
      # is probably rare, and the constant cost is pretty low.  Another
      # drawback is that we issue a delete for each path and not just
      # a single delete for the topmost directory pruned.
      if should_prune and len(parent_node) == 0:
        self.delete_path(cvs_path.parent_directory, lod, True)

  def initialize_project(self, project):
    """Create the basic structure for PROJECT."""

    self._invoke_delegates('initialize_project', project)

    self._open_writable_lod_node(
        Ctx()._symbol_db.get_symbol(project.trunk_id),
        create=True, invoke_delegates=False
        )

  def change_path(self, cvs_rev):
    """Register a change in self._youngest for the CVS_REV's svn_path."""

    # We do not have to update the nodes because our mirror is only
    # concerned with the presence or absence of paths, and a file
    # content change does not cause any path changes.
    self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))

  def add_path(self, cvs_rev):
    """Add the CVS_REV's svn_path to the repository mirror."""

    cvs_file = cvs_rev.cvs_file
    parent_node = self._open_writable_node(
        cvs_file.parent_directory, cvs_rev.lod, True
        )

    if cvs_file in parent_node:
      raise self.PathExistsError(
          'Attempt to add path \'%s\' to repository mirror '
          'when it already exists in the mirror.'
          % (cvs_rev.get_svn_path(),)
          )

    parent_node[cvs_file] = None

    self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))

  def copy_lod(self, src_lod, dest_lod, src_revnum):
    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD.  Note that this node is not
    necessarily writable, though its parent node necessarily is."""

    dest_path = dest_lod.get_path()

    # Get the node of our src_path
    src_node = self._open_readonly_lod_node(src_lod, src_revnum)

    dest_lod_history = self._get_lod_history(dest_lod)
    if dest_lod_history.exists():
      raise self.PathExistsError(
          "Attempt to add path '%s' to repository mirror "
          "when it already exists in the mirror." % dest_path
          )

    dest_lod_history.update(self._youngest, src_node.id)

    self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)

    # This is a cheap copy, so src_node has the same contents as the
    # new destination node.
    return src_node

  def copy_path(
        self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False
        ):
    """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination's
    parent *must* exist unless CREATE_PARENT is specified.  But the
    destination itself *must not* exist.

    Return the new node at (CVS_PATH, DEST_LOD).  Note that this node
    is not necessarily writable, though its parent node necessarily
    is."""

    if cvs_path.parent_directory is None:
      return self.copy_lod(src_lod, dest_lod, src_revnum)

    # Get the node of our source, or None if it is a file:
    src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum)

    # Get the parent path of the destination:
    try:
      dest_parent_node = self._open_writable_node(
          cvs_path.parent_directory, dest_lod, create_parent
          )
    except KeyError:
      raise self.ParentMissingError(
          'Attempt to add path \'%s\' to repository mirror, '
          'but its parent directory doesn\'t exist in the mirror.'
          % (dest_lod.get_path(cvs_path.cvs_path),)
          )

    if cvs_path in dest_parent_node:
      raise self.PathExistsError(
          'Attempt to add path \'%s\' to repository mirror '
          'when it already exists in the mirror.'
          % (dest_lod.get_path(cvs_path.cvs_path),)
          )

    dest_parent_node[cvs_path] = src_node
    self._invoke_delegates(
        'copy_path',
        src_lod.get_path(cvs_path.cvs_path),
        dest_lod.get_path(cvs_path.cvs_path),
        src_revnum
        )

    # This is a cheap copy, so src_node has the same contents as the
    # new destination node.
    return src_node

  def fill_symbol(self, svn_symbol_commit, fill_source):
    """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.

    The symbolic name is guaranteed to exist in the Subversion
    repository by the end of this call, even if there are no paths
    under it."""

    symbol = svn_symbol_commit.symbol

    try:
      dest_node = self._open_writable_lod_node(symbol, False)
    except KeyError:
      dest_node = None
    self._fill_directory(symbol, dest_node, fill_source, None)

  def _prune_extra_entries(
        self, dest_cvs_path, symbol, dest_node, src_entries
        ):
    """Delete any entries in DEST_NODE that are not in SRC_ENTRIES.

    This might require creating a new writable node, so return a
    possibly-modified dest_node."""

    delete_list = [
        cvs_path
        for cvs_path in dest_node
        if cvs_path not in src_entries
        ]
    if delete_list:
      if not isinstance(dest_node, _WritableMirrorNode):
        dest_node = self._open_writable_node(dest_cvs_path, symbol, False)
      # Sort the delete list so that the output is in a consistent
      # order:
      delete_list.sort()
      for cvs_path in delete_list:
        del dest_node[cvs_path]
        self._invoke_delegates('delete_path', symbol, cvs_path)

    return dest_node

  def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE, and recurse into the child items.

    Fill SYMBOL starting at the path FILL_SOURCE.cvs_path.  DEST_NODE
    is the node of this destination path, or None if the destination
    does not yet exist.  All directories above this path have already
    been filled.  FILL_SOURCE is a FillSource instance describing the
    items within a subtree of the repository that still need to be
    copied to the destination.

    PARENT_SOURCE is the SVNRevisionRange that was used to copy the
    parent directory, if it was copied in this commit.  We prefer to
    copy from the same source as was used for the parent, since it
    typically requires less touching-up.  If PARENT_SOURCE is None,
    then the parent directory was not copied in this commit, so no
    revision is preferable to any other."""

    copy_source = fill_source.compute_best_source(parent_source)

    # Figure out if we shall copy to this destination and delete any
    # destination path that is in the way.
    if dest_node is None:
      # The destination does not exist at all, so it definitely has to
      # be copied:
      dest_node = self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    elif (parent_source is not None) and (
          copy_source.source_lod != parent_source.source_lod
          or copy_source.opening_revnum != parent_source.opening_revnum
          ):
      # The parent path was copied from a different source than we
      # need to use, so we have to delete the version that was copied
      # with the parent then re-copy from the correct source:
      self.delete_path(fill_source.cvs_path, symbol)
      dest_node = self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    else:
      copy_source = parent_source

    # Get the map {entry : FillSource} for entries within this
    # directory that need filling.
    src_entries = {}
    for (cvs_path, fill_subsource) in fill_source.get_subsources():
      src_entries[cvs_path] = fill_subsource

    if copy_source is not None:
      dest_node = self._prune_extra_entries(
          fill_source.cvs_path, symbol, dest_node, src_entries
          )

    # Recurse into the SRC_ENTRIES ids sorted in alphabetical order.
    cvs_paths = src_entries.keys()
    cvs_paths.sort()
    for cvs_path in cvs_paths:
      if isinstance(cvs_path, CVSDirectory):
        # Path is a CVSDirectory:
        try:
          dest_subnode = dest_node[cvs_path]
        except KeyError:
          # Path didn't exist at all; it has to be created:
          dest_subnode = None
        self._fill_directory(
            symbol, dest_subnode, src_entries[cvs_path], copy_source
            )
      else:
        # Path is a CVSFile:
        self._fill_file(
            symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source
            )

  def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE.

    Fill SYMBOL at path FILL_SOURCE.cvs_path.  DEST_NODE is the node
    of this destination path, or None if the destination does not yet
    exist.  All directories above this path have already been filled
    as needed.  FILL_SOURCE is a FillSource instance describing the
    item that needs to be copied to the destination.

    PARENT_SOURCE is the source from which the parent directory was
    copied, or None if the parent directory was not copied during this
    commit.  We prefer to copy from PARENT_SOURCE, since it typically
    requires less touching-up.  If PARENT_SOURCE is None, then the
    parent directory was not copied in this commit, so no revision is
    preferable to any other."""

    copy_source = fill_source.compute_best_source(parent_source)

    # Figure out if we shall copy to this destination and delete any
    # destination path that is in the way.
    if not dest_existed:
      # The destination does not exist at all, so it definitely has to
      # be copied:
      self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    elif (parent_source is not None) and (
          copy_source.source_lod != parent_source.source_lod
          or copy_source.opening_revnum != parent_source.opening_revnum
          ):
      # The parent path was copied from a different source than we
      # need to use, so we have to delete the version that was copied
      # with the parent and then re-copy from the correct source:
      self.delete_path(fill_source.cvs_path, symbol)
      self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )

  def add_delegate(self, delegate):
    """Adds DELEGATE to self._delegates.

    For every delegate you add, as soon as SVNRepositoryMirror
    performs a repository action method, SVNRepositoryMirror will call
    the delegate's corresponding repository action method.  Multiple
    delegates will be called in the order that they are added.  See
    SVNRepositoryMirrorDelegate for more information."""

    self._delegates.append(delegate)

  def _invoke_delegates(self, method, *args):
    """Invoke a method on each delegate.

    Iterate through each of our delegates, in the order that they were
    added, and call the delegate's method named METHOD with the
    arguments in ARGS."""

    for delegate in self._delegates:
      getattr(delegate, method)(*args)

  def close(self):
    """Call the delegate finish methods and close databases."""

    self._invoke_delegates('finish')
    self._lod_histories = None
    self._nodes_db.close()
    self._nodes_db = None
Example #16
0
class PersistenceManager:
  """The PersistenceManager allows us to effectively store SVNCommits
  to disk and retrieve them later using only their subversion revision
  number as the key.  It also returns the subversion revision number
  for a given CVSRevision's unique key.

  All information pertinent to each SVNCommit is stored in a series of
  on-disk databases so that SVNCommits can be retrieved on-demand.

  MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
  In 'new' mode, PersistenceManager will initialize a new set of on-disk
  databases and be fully-featured.
  In 'read' mode, PersistenceManager will open existing on-disk databases
  and the set_* methods will be unavailable."""

  def __init__(self, mode):
    self.mode = mode
    if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
      raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
    primer = (
        SVNInitialProjectCommit,
        SVNPrimaryCommit,
        SVNPostCommit,
        SVNBranchCommit,
        SVNTagCommit,
        )
    serializer = PrimedPickleSerializer(primer)
    self.svn_commit_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
        artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
        mode, serializer)
    self.cvs2svn_db = RecordTable(
        artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
        mode, SignedIntegerPacker(SVN_INVALID_REVNUM))

  def get_svn_revnum(self, cvs_rev_id):
    """Return the Subversion revision number in which CVS_REV_ID was
    committed, or SVN_INVALID_REVNUM if there is no mapping for
    CVS_REV_ID."""

    return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)

  def get_svn_commit(self, svn_revnum):
    """Return an SVNCommit that corresponds to SVN_REVNUM.

    If no SVNCommit exists for revnum SVN_REVNUM, then return None."""

    return self.svn_commit_db.get(svn_revnum, None)

  def put_svn_commit(self, svn_commit):
    """Record the bidirectional mapping between SVN_REVNUM and
    CVS_REVS and record associated attributes."""

    if self.mode == DB_OPEN_READ:
      raise RuntimeError, \
          'Write operation attempted on read-only PersistenceManager'

    self.svn_commit_db[svn_commit.revnum] = svn_commit

    if isinstance(svn_commit, SVNRevisionCommit):
      for cvs_rev in svn_commit.cvs_revs:
        self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum

  def close(self):
    self.cvs2svn_db.close()
    self.cvs2svn_db = None
    self.svn_commit_db.close()
    self.svn_commit_db = None
Example #17
0
class SVNRepositoryMirror:
    """Mirror a Subversion repository and its history.

  Mirror a Subversion repository as it is constructed, one SVNCommit
  at a time.  For each LineOfDevelopment we store a skeleton of the
  directory structure within that LOD for each SVN revision number in
  which it changed.  The creation of a dumpfile or Subversion
  repository is handled by delegates.  See the add_delegate() method
  for how to set delegates.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  SVNRevision in which files were added to or deleted from that LOD,
  as well as the node id of the node tree describing the LOD contents
  at that SVN revision.

  The LOD trees themselves are stored in the _nodes_db database, which
  maps node ids to nodes.  A node is a map from CVSPath.id to ids of
  the corresponding subnodes.  The _nodes_db is stored on disk and
  each access is expensive.

  The _nodes_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each SVNCommit and
  end_commit() afterwards.

  *** WARNING *** Path arguments to methods in this class MUST NOT
      have leading or trailing slashes."""
    class ParentMissingError(Exception):
        """The parent of a path is missing.

    Exception raised if an attempt is made to add a path to the
    repository mirror but the parent's path doesn't exist in the
    youngest revision of the repository."""

        pass

    class PathExistsError(Exception):
        """The path already exists in the repository.

    Exception raised if an attempt is made to add a path to the
    repository mirror and that path already exists in the youngest
    revision of the repository."""

        pass

    def register_artifacts(self, which_pass):
        """Register the artifacts that will be needed for this object."""

        artifact_manager.register_temp_file(
            config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass)
        artifact_manager.register_temp_file(config.SVN_MIRROR_NODES_STORE,
                                            which_pass)

    def open(self):
        """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

        self._key_generator = KeyGenerator()

        self._delegates = []

        # A map from LOD to LODHistory instance for all LODs that have
        # been defines so far:
        self._lod_histories = {}

        # This corresponds to the 'nodes' table in a Subversion fs.  (We
        # don't need a 'representations' or 'strings' table because we
        # only track metadata, not file contents.)
        self._nodes_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(
                config.SVN_MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=_NodeSerializer())

        # Start at revision 0 without a root node.  It will be created
        # by _open_writable_root_node.
        self._youngest = 0

    def start_commit(self, revnum, revprops):
        """Start a new commit."""

        self._youngest = revnum

        # A map {node_id : {CVSPath : node_id}}.
        self._new_nodes = {}

        self._invoke_delegates('start_commit', revnum, revprops)

    def end_commit(self):
        """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

        # Copy the new nodes to the _nodes_db
        for id, value in self._new_nodes.items():
            self._nodes_db[id] = value

        del self._new_nodes

        self._invoke_delegates('end_commit')

    def _get_lod_history(self, lod):
        """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

        try:
            return self._lod_histories[lod]
        except KeyError:
            lod_history = LODHistory()
            self._lod_histories[lod] = lod_history
            return lod_history

    def _create_empty_node(self):
        """Create and return a new, empty, writable node."""

        new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {})
        self._new_nodes[new_node.id] = new_node.entries
        return new_node

    def _copy_node(self, old_node):
        """Create and return a new, writable node that is a copy of OLD_NODE."""

        new_node = _WritableMirrorNode(self, self._key_generator.gen_id(),
                                       old_node.entries.copy())

        self._new_nodes[new_node.id] = new_node.entries
        return new_node

    def _get_node(self, id):
        """Return the node for id ID.

    The node might be read from either self._nodes_db or
    self._new_nodes.  Return an instance of _MirrorNode."""

        try:
            return _WritableMirrorNode(self, id, self._new_nodes[id])
        except KeyError:
            return _ReadOnlyMirrorNode(self, id, self._nodes_db[id])

    def _open_readonly_lod_node(self, lod, revnum):
        """Open a readonly node for the root path of LOD at revision REVNUM.

    Return an instance of _MirrorNode if the path exists; otherwise,
    raise KeyError."""

        lod_history = self._get_lod_history(lod)
        node_id = lod_history.get_id(revnum)
        return self._get_node(node_id)

    def _open_readonly_node(self, cvs_path, lod, revnum):
        """Open a readonly node for CVS_PATH from LOD at REVNUM.

    If cvs_path refers to a leaf node, return None.

    Raise KeyError if the node does not exist."""

        if cvs_path.parent_directory is None:
            return self._open_readonly_lod_node(lod, revnum)
        else:
            parent_node = self._open_readonly_node(cvs_path.parent_directory,
                                                   lod, revnum)
            return parent_node[cvs_path]

    def _open_writable_lod_node(self, lod, create, invoke_delegates=True):
        """Open a writable node for the root path in LOD.

    Iff CREATE is True, create the path and any missing directories.
    Return an instance of _WritableMirrorNode.  Raise KeyError if the
    path doesn't already exist and CREATE is not set."""

        lod_history = self._get_lod_history(lod)
        try:
            id = lod_history.get_id()
        except KeyError:
            if create:
                node = self._create_empty_node()
                lod_history.update(self._youngest, node.id)
                if invoke_delegates:
                    self._invoke_delegates('initialize_lod', lod)
            else:
                raise
        else:
            node = self._get_node(id)
            if not isinstance(node, _WritableMirrorNode):
                # Node was created in an earlier revision, so we have to copy
                # it to make it writable:
                node = self._copy_node(node)
                lod_history.update(self._youngest, node.id)

        return node

    def _open_writable_node(self, cvs_directory, lod, create):
        """Open a writable node for CVS_DIRECTORY in LOD.

    Iff CREATE is True, create a directory node at SVN_PATH and any
    missing directories.  Return an instance of _WritableMirrorNode.

    Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not
    set."""

        if cvs_directory.parent_directory is None:
            return self._open_writable_lod_node(lod, create)

        parent_node = self._open_writable_node(cvs_directory.parent_directory,
                                               lod, create)

        try:
            node = parent_node[cvs_directory]
        except KeyError:
            if create:
                # The component does not exist, so we create it.
                new_node = self._create_empty_node()
                parent_node[cvs_directory] = new_node
                self._invoke_delegates('mkdir', lod, cvs_directory)
                return new_node
            else:
                raise
        else:
            if isinstance(node, _WritableMirrorNode):
                return node
            elif isinstance(node, _ReadOnlyMirrorNode):
                new_node = self._copy_node(node)
                parent_node[cvs_directory] = new_node
                return new_node
            else:
                raise InternalError('Attempt to modify file at %s in mirror' %
                                    (cvs_directory, ))

    def delete_lod(self, lod):
        """Delete the main path for LOD from the tree.

    The path must currently exist.  Silently refuse to delete trunk
    paths."""

        if isinstance(lod, Trunk):
            # Never delete a Trunk path.
            return

        lod_history = self._get_lod_history(lod)
        if not lod_history.exists():
            raise KeyError()
        lod_history.update(self._youngest, None)
        self._invoke_delegates('delete_lod', lod)

    def delete_path(self, cvs_path, lod, should_prune=False):
        """Delete CVS_PATH from LOD."""

        if cvs_path.parent_directory is None:
            self.delete_lod(lod)
            return
        else:
            parent_node = self._open_writable_node(cvs_path.parent_directory,
                                                   lod, False)
            del parent_node[cvs_path]
            self._invoke_delegates('delete_path', lod, cvs_path)

            # The following recursion makes pruning an O(n^2) operation in the
            # worst case (where n is the depth of SVN_PATH), but the worst case
            # is probably rare, and the constant cost is pretty low.  Another
            # drawback is that we issue a delete for each path and not just
            # a single delete for the topmost directory pruned.
            if should_prune and len(parent_node) == 0:
                self.delete_path(cvs_path.parent_directory, lod, True)

    def initialize_project(self, project):
        """Create the basic structure for PROJECT."""

        self._invoke_delegates('initialize_project', project)

        self._open_writable_lod_node(Ctx()._symbol_db.get_symbol(
            project.trunk_id),
                                     create=True,
                                     invoke_delegates=False)

    def change_path(self, cvs_rev):
        """Register a change in self._youngest for the CVS_REV's svn_path."""

        # We do not have to update the nodes because our mirror is only
        # concerned with the presence or absence of paths, and a file
        # content change does not cause any path changes.
        self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))

    def add_path(self, cvs_rev):
        """Add the CVS_REV's svn_path to the repository mirror."""

        cvs_file = cvs_rev.cvs_file
        parent_node = self._open_writable_node(cvs_file.parent_directory,
                                               cvs_rev.lod, True)

        if cvs_file in parent_node:
            raise self.PathExistsError(
                'Attempt to add path \'%s\' to repository mirror '
                'when it already exists in the mirror.' %
                (cvs_rev.get_svn_path(), ))

        parent_node[cvs_file] = None

        self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))

    def copy_lod(self, src_lod, dest_lod, src_revnum):
        """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD.  Note that this node is not
    necessarily writable, though its parent node necessarily is."""

        dest_path = dest_lod.get_path()

        # Get the node of our src_path
        src_node = self._open_readonly_lod_node(src_lod, src_revnum)

        dest_lod_history = self._get_lod_history(dest_lod)
        if dest_lod_history.exists():
            raise self.PathExistsError(
                "Attempt to add path '%s' to repository mirror "
                "when it already exists in the mirror." % dest_path)

        dest_lod_history.update(self._youngest, src_node.id)

        self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)

        # This is a cheap copy, so src_node has the same contents as the
        # new destination node.
        return src_node

    def copy_path(self,
                  cvs_path,
                  src_lod,
                  dest_lod,
                  src_revnum,
                  create_parent=False):
        """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination's
    parent *must* exist unless CREATE_PARENT is specified.  But the
    destination itself *must not* exist.

    Return the new node at (CVS_PATH, DEST_LOD).  Note that this node
    is not necessarily writable, though its parent node necessarily
    is."""

        if cvs_path.parent_directory is None:
            return self.copy_lod(src_lod, dest_lod, src_revnum)

        # Get the node of our source, or None if it is a file:
        src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum)

        # Get the parent path of the destination:
        try:
            dest_parent_node = self._open_writable_node(
                cvs_path.parent_directory, dest_lod, create_parent)
        except KeyError:
            raise self.ParentMissingError(
                'Attempt to add path \'%s\' to repository mirror, '
                'but its parent directory doesn\'t exist in the mirror.' %
                (dest_lod.get_path(cvs_path.cvs_path), ))

        if cvs_path in dest_parent_node:
            raise self.PathExistsError(
                'Attempt to add path \'%s\' to repository mirror '
                'when it already exists in the mirror.' %
                (dest_lod.get_path(cvs_path.cvs_path), ))

        dest_parent_node[cvs_path] = src_node
        self._invoke_delegates('copy_path',
                               src_lod.get_path(cvs_path.cvs_path),
                               dest_lod.get_path(cvs_path.cvs_path),
                               src_revnum)

        # This is a cheap copy, so src_node has the same contents as the
        # new destination node.
        return src_node

    def fill_symbol(self, svn_symbol_commit, fill_source):
        """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.

    The symbolic name is guaranteed to exist in the Subversion
    repository by the end of this call, even if there are no paths
    under it."""

        symbol = svn_symbol_commit.symbol

        try:
            dest_node = self._open_writable_lod_node(symbol, False)
        except KeyError:
            dest_node = None
        self._fill_directory(symbol, dest_node, fill_source, None)

    def _prune_extra_entries(self, dest_cvs_path, symbol, dest_node,
                             src_entries):
        """Delete any entries in DEST_NODE that are not in SRC_ENTRIES.

    This might require creating a new writable node, so return a
    possibly-modified dest_node."""

        delete_list = [
            cvs_path for cvs_path in dest_node if cvs_path not in src_entries
        ]
        if delete_list:
            if not isinstance(dest_node, _WritableMirrorNode):
                dest_node = self._open_writable_node(dest_cvs_path, symbol,
                                                     False)
            # Sort the delete list so that the output is in a consistent
            # order:
            delete_list.sort()
            for cvs_path in delete_list:
                del dest_node[cvs_path]
                self._invoke_delegates('delete_path', symbol, cvs_path)

        return dest_node

    def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
        """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE, and recurse into the child items.

    Fill SYMBOL starting at the path FILL_SOURCE.cvs_path.  DEST_NODE
    is the node of this destination path, or None if the destination
    does not yet exist.  All directories above this path have already
    been filled.  FILL_SOURCE is a FillSource instance describing the
    items within a subtree of the repository that still need to be
    copied to the destination.

    PARENT_SOURCE is the SVNRevisionRange that was used to copy the
    parent directory, if it was copied in this commit.  We prefer to
    copy from the same source as was used for the parent, since it
    typically requires less touching-up.  If PARENT_SOURCE is None,
    then the parent directory was not copied in this commit, so no
    revision is preferable to any other."""

        copy_source = fill_source.compute_best_source(parent_source)

        # Figure out if we shall copy to this destination and delete any
        # destination path that is in the way.
        if dest_node is None:
            # The destination does not exist at all, so it definitely has to
            # be copied:
            dest_node = self.copy_path(fill_source.cvs_path,
                                       copy_source.source_lod, symbol,
                                       copy_source.opening_revnum)
        elif (parent_source is not None) and (
                copy_source.source_lod != parent_source.source_lod
                or copy_source.opening_revnum != parent_source.opening_revnum):
            # The parent path was copied from a different source than we
            # need to use, so we have to delete the version that was copied
            # with the parent then re-copy from the correct source:
            self.delete_path(fill_source.cvs_path, symbol)
            dest_node = self.copy_path(fill_source.cvs_path,
                                       copy_source.source_lod, symbol,
                                       copy_source.opening_revnum)
        else:
            copy_source = parent_source

        # Get the map {entry : FillSource} for entries within this
        # directory that need filling.
        src_entries = {}
        for (cvs_path, fill_subsource) in fill_source.get_subsources():
            src_entries[cvs_path] = fill_subsource

        if copy_source is not None:
            dest_node = self._prune_extra_entries(fill_source.cvs_path, symbol,
                                                  dest_node, src_entries)

        # Recurse into the SRC_ENTRIES ids sorted in alphabetical order.
        cvs_paths = src_entries.keys()
        cvs_paths.sort()
        for cvs_path in cvs_paths:
            if isinstance(cvs_path, CVSDirectory):
                # Path is a CVSDirectory:
                try:
                    dest_subnode = dest_node[cvs_path]
                except KeyError:
                    # Path didn't exist at all; it has to be created:
                    dest_subnode = None
                self._fill_directory(symbol, dest_subnode,
                                     src_entries[cvs_path], copy_source)
            else:
                # Path is a CVSFile:
                self._fill_file(symbol, cvs_path in dest_node,
                                src_entries[cvs_path], copy_source)

    def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
        """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE.

    Fill SYMBOL at path FILL_SOURCE.cvs_path.  DEST_NODE is the node
    of this destination path, or None if the destination does not yet
    exist.  All directories above this path have already been filled
    as needed.  FILL_SOURCE is a FillSource instance describing the
    item that needs to be copied to the destination.

    PARENT_SOURCE is the source from which the parent directory was
    copied, or None if the parent directory was not copied during this
    commit.  We prefer to copy from PARENT_SOURCE, since it typically
    requires less touching-up.  If PARENT_SOURCE is None, then the
    parent directory was not copied in this commit, so no revision is
    preferable to any other."""

        copy_source = fill_source.compute_best_source(parent_source)

        # Figure out if we shall copy to this destination and delete any
        # destination path that is in the way.
        if not dest_existed:
            # The destination does not exist at all, so it definitely has to
            # be copied:
            self.copy_path(fill_source.cvs_path, copy_source.source_lod,
                           symbol, copy_source.opening_revnum)
        elif (parent_source is not None) and (
                copy_source.source_lod != parent_source.source_lod
                or copy_source.opening_revnum != parent_source.opening_revnum):
            # The parent path was copied from a different source than we
            # need to use, so we have to delete the version that was copied
            # with the parent and then re-copy from the correct source:
            self.delete_path(fill_source.cvs_path, symbol)
            self.copy_path(fill_source.cvs_path, copy_source.source_lod,
                           symbol, copy_source.opening_revnum)

    def add_delegate(self, delegate):
        """Adds DELEGATE to self._delegates.

    For every delegate you add, as soon as SVNRepositoryMirror
    performs a repository action method, SVNRepositoryMirror will call
    the delegate's corresponding repository action method.  Multiple
    delegates will be called in the order that they are added.  See
    SVNRepositoryMirrorDelegate for more information."""

        self._delegates.append(delegate)

    def _invoke_delegates(self, method, *args):
        """Invoke a method on each delegate.

    Iterate through each of our delegates, in the order that they were
    added, and call the delegate's method named METHOD with the
    arguments in ARGS."""

        for delegate in self._delegates:
            getattr(delegate, method)(*args)

    def close(self):
        """Call the delegate finish methods and close databases."""

        self._invoke_delegates('finish')
        self._lod_histories = None
        self._nodes_db.close()
        self._nodes_db = None
class _NodeDatabase(object):
  """A database storing all of the directory nodes.

  The nodes are written in groups every time write_new_nodes() is
  called.  To the database is written a dictionary {node_id :
  [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
  the new nodes.  When a node is read, its whole group is read and
  cached under the assumption that the other nodes in the group are
  likely to be needed soon.  The cache is retained across revisions
  and cleared when _cache_max_size is exceeded.

  The dictionaries for nodes that have been read from the database
  during the current revision are cached by node_id in the _cache
  member variable.  The corresponding dictionaries are *not* copied
  when read.  To avoid cross-talk between distinct MirrorDirectory
  instances that have the same node_id, users of these dictionaries
  have to copy them before modification."""

  # How many entries should be allowed in the cache for each
  # CVSDirectory in the repository.  (This number is very roughly the
  # number of complete lines of development that can be stored in the
  # cache at one time.)
  CACHE_SIZE_MULTIPLIER = 5

  # But the cache will never be limited to less than this number:
  MIN_CACHE_LIMIT = 5000

  def __init__(self):
    self.cvs_file_db = Ctx()._cvs_file_db
    self.db = IndexedDatabase(
        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=MarshalSerializer(),
        )

    # A list of the maximum node_id stored by each call to
    # write_new_nodes():
    self._max_node_ids = [0]

    # A map {node_id : {cvs_path : node_id}}:
    self._cache = {}

    # The number of directories in the repository:
    num_dirs = len([
        cvs_path
        for cvs_path in self.cvs_file_db.itervalues()
        if isinstance(cvs_path, CVSDirectory)
        ])

    self._cache_max_size = max(
        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
        self.MIN_CACHE_LIMIT,
        )

  def _load(self, items):
    retval = {}
    for (id, value) in items:
      retval[self.cvs_file_db.get_file(id)] = value
    return retval

  def _dump(self, node):
    return [
        (cvs_path.id, value)
        for (cvs_path, value) in node.iteritems()
        ]

  def _determine_index(self, id):
    """Return the index of the record holding the node with ID."""

    return bisect.bisect_left(self._max_node_ids, id)

  def __getitem__(self, id):
    try:
      items = self._cache[id]
    except KeyError:
      index = self._determine_index(id)
      for (node_id, items) in self.db[index].items():
        self._cache[node_id] = self._load(items)
      items = self._cache[id]

    return items

  def write_new_nodes(self, nodes):
    """Write NODES to the database.

    NODES is an iterable of writable CurrentMirrorDirectory instances."""

    if len(self._cache) > self._cache_max_size:
      # The size of the cache has exceeded the threshold.  Discard the
      # old cache values (but still store the new nodes into the
      # cache):
      Log().debug('Clearing node cache')
      self._cache.clear()

    data = {}
    max_node_id = 0
    for node in nodes:
      max_node_id = max(max_node_id, node.id)
      data[node.id] = self._dump(node._entries)
      self._cache[node.id] = node._entries

    self.db[len(self._max_node_ids)] = data

    if max_node_id == 0:
      # Rewrite last value:
      self._max_node_ids.append(self._max_node_ids[-1])
    else:
      self._max_node_ids.append(max_node_id)

  def close(self):
    self._cache.clear()
    self.db.close()
    self.db = None