Example #1
0
    def start(self):
        self._delta_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
            artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
            DB_OPEN_READ,
        )
        self._delta_db.__delitem__ = lambda id: None
        self._tree_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_TREES_STORE),
            artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
            DB_OPEN_READ,
        )
        serializer = MarshalSerializer()
        if self._compress:
            serializer = CompressingSerializer(serializer)
        self._co_db = self._Database(
            artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB),
            DB_OPEN_NEW,
            serializer,
        )

        # The set of CVSFile instances whose TextRecords have already been
        # read:
        self._loaded_files = set()

        # A map { CVSFILE : _FileTree } for files that currently have live
        # revisions:
        self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
Example #2
0
class InternalRevisionCollector(RevisionCollector):
  """The RevisionCollector used by InternalRevisionReader."""

  def __init__(self, compress):
    RevisionCollector.__init__(self)
    self._compress = compress

  def register_artifacts(self, which_pass):
    artifact_manager.register_temp_file(
        config.RCS_DELTAS_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
    artifact_manager.register_temp_file(
        config.RCS_TREES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)

  def start(self):
    serializer = MarshalSerializer()
    if self._compress:
      serializer = CompressingSerializer(serializer)
    self._delta_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
        DB_OPEN_NEW, serializer,
        )
    primer = (FullTextRecord, DeltaTextRecord)
    self._rcs_trees = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
        DB_OPEN_NEW, PrimedPickleSerializer(primer),
        )

  def _writeout(self, text_record, text):
    self.text_record_db.add(text_record)
    self._delta_db[text_record.id] = text

  def process_file(self, cvs_file_items):
    """Read revision information for the file described by CVS_FILE_ITEMS.

    Compute the text record refcounts, discard any records that are
    unneeded, and store the text records for the file to the
    _rcs_trees database."""

    # A map from cvs_rev_id to TextRecord instance:
    self.text_record_db = TextRecordDatabase(self._delta_db, NullDatabase())

    parse(
        open(cvs_file_items.cvs_file.rcs_path, 'rb'),
        _Sink(self, cvs_file_items),
        )

    self.text_record_db.recompute_refcounts(cvs_file_items)
    self.text_record_db.free_unused()
    self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
    del self.text_record_db

  def finish(self):
    self._delta_db.close()
    self._rcs_trees.close()
Example #3
0
    def __init__(self):
        self.cvs_path_db = Ctx()._cvs_path_db
        self.db = IndexedDatabase(
            artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=MarshalSerializer(),
        )

        # A list of the maximum node_id stored by each call to
        # write_new_nodes():
        self._max_node_ids = [0]

        # A map {node_id : {cvs_path : node_id}}:
        self._cache = {}

        # The number of directories in the repository:
        num_dirs = len([
            cvs_path for cvs_path in self.cvs_path_db.itervalues()
            if isinstance(cvs_path, CVSDirectory)
        ])

        self._cache_max_size = max(
            int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
            self.MIN_CACHE_LIMIT,
        )
Example #4
0
 def start(self):
   serializer = MarshalSerializer()
   if self._compress:
     serializer = CompressingSerializer(serializer)
   self._delta_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
       artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
       DB_OPEN_NEW, serializer,
       )
   primer = (FullTextRecord, DeltaTextRecord)
   self._rcs_trees = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_TREES_STORE),
       artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
       DB_OPEN_NEW, PrimedPickleSerializer(primer),
       )
Example #5
0
 def start(self):
   serializer = MarshalSerializer()
   if self._compress:
     serializer = CompressingSerializer(serializer)
   self._delta_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
       artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
       DB_OPEN_NEW, serializer,
       )
   primer = (FullTextRecord, DeltaTextRecord)
   self._rcs_trees = IndexedDatabase(
       artifact_manager.get_temp_file(config.RCS_TREES_STORE),
       artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
       DB_OPEN_NEW, PrimedPickleSerializer(primer),
       )
Example #6
0
  def __init__(self):
    self.cvs_path_db = Ctx()._cvs_path_db
    self.db = IndexedDatabase(
        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=MarshalSerializer(),
        )

    # A list of the maximum node_id stored by each call to
    # write_new_nodes():
    self._max_node_ids = [0]

    # A map {node_id : {cvs_path : node_id}}:
    self._cache = {}

    # The number of directories in the repository:
    num_dirs = len([
        cvs_path
        for cvs_path in self.cvs_path_db.itervalues()
        if isinstance(cvs_path, CVSDirectory)
        ])

    self._cache_max_size = max(
        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
        self.MIN_CACHE_LIMIT,
        )
 def __init__(self, mode):
     self.mode = mode
     if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
         raise RuntimeError("Invalid 'mode' argument to PersistenceManager")
     primer = (
         SVNInitialProjectCommit,
         SVNPrimaryCommit,
         SVNPostCommit,
         SVNBranchCommit,
         SVNTagCommit,
     )
     serializer = PrimedPickleSerializer(primer)
     self.svn_commit_db = IndexedDatabase(
         artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
         artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode,
         serializer)
     self.cvs2svn_db = RecordTable(
         artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
         mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
Example #8
0
def MetadataDatabase(store_filename, index_table_filename, mode):
    """A database to store Metadata instances that describe CVSRevisions.

  This database manages a map

      id -> Metadata instance

  where id is a unique identifier for the metadata."""

    return IndexedDatabase(
        store_filename,
        index_table_filename,
        mode,
        PrimedPickleSerializer((Metadata, )),
    )
 def __init__(self, mode):
   self.mode = mode
   if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
     raise RuntimeError("Invalid 'mode' argument to PersistenceManager")
   primer = (
       SVNInitialProjectCommit,
       SVNPrimaryCommit,
       SVNPostCommit,
       SVNBranchCommit,
       SVNTagCommit,
       )
   serializer = PrimedPickleSerializer(primer)
   self.svn_commit_db = IndexedDatabase(
       artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
       artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
       mode, serializer)
   self.cvs2svn_db = RecordTable(
       artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
       mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
Example #10
0
class InternalRevisionCollector(RevisionCollector):
    """The RevisionCollector used by InternalRevisionReader."""
    def __init__(self, compress):
        RevisionCollector.__init__(self)
        self._compress = compress

    def register_artifacts(self, which_pass):
        artifact_manager.register_temp_file(config.RCS_DELTAS_INDEX_TABLE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_DELTAS_STORE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_TREES_INDEX_TABLE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)

    def start(self):
        serializer = MarshalSerializer()
        if self._compress:
            serializer = CompressingSerializer(serializer)
        self._delta_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
            artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer,
        )
        primer = (FullTextRecord, DeltaTextRecord)
        self._rcs_trees = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_TREES_STORE),
            artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
            DB_OPEN_NEW,
            PrimedPickleSerializer(primer),
        )

    def _writeout(self, text_record, text):
        self.text_record_db.add(text_record)
        self._delta_db[text_record.id] = text

    def process_file(self, cvs_file_items):
        """Read revision information for the file described by CVS_FILE_ITEMS.

    Compute the text record refcounts, discard any records that are
    unneeded, and store the text records for the file to the
    _rcs_trees database."""

        # A map from cvs_rev_id to TextRecord instance:
        self.text_record_db = TextRecordDatabase(self._delta_db,
                                                 NullDatabase())

        parse(
            open(cvs_file_items.cvs_file.rcs_path, 'rb'),
            _Sink(self, cvs_file_items),
        )

        self.text_record_db.recompute_refcounts(cvs_file_items)
        self.text_record_db.free_unused()
        self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
        del self.text_record_db

    def finish(self):
        self._delta_db.close()
        self._rcs_trees.close()
class PersistenceManager:
  """The PersistenceManager allows us to effectively store SVNCommits
  to disk and retrieve them later using only their subversion revision
  number as the key.  It also returns the subversion revision number
  for a given CVSRevision's unique key.

  All information pertinent to each SVNCommit is stored in a series of
  on-disk databases so that SVNCommits can be retrieved on-demand.

  MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
  In 'new' mode, PersistenceManager will initialize a new set of on-disk
  databases and be fully-featured.
  In 'read' mode, PersistenceManager will open existing on-disk databases
  and the set_* methods will be unavailable."""

  def __init__(self, mode):
    self.mode = mode
    if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
      raise RuntimeError("Invalid 'mode' argument to PersistenceManager")
    primer = (
        SVNInitialProjectCommit,
        SVNPrimaryCommit,
        SVNPostCommit,
        SVNBranchCommit,
        SVNTagCommit,
        )
    serializer = PrimedPickleSerializer(primer)
    self.svn_commit_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
        artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
        mode, serializer)
    self.cvs2svn_db = RecordTable(
        artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
        mode, SignedIntegerPacker(SVN_INVALID_REVNUM))

  def get_svn_revnum(self, cvs_rev_id):
    """Return the Subversion revision number in which CVS_REV_ID was
    committed, or SVN_INVALID_REVNUM if there is no mapping for
    CVS_REV_ID."""

    return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)

  def get_svn_commit(self, svn_revnum):
    """Return an SVNCommit that corresponds to SVN_REVNUM.

    If no SVNCommit exists for revnum SVN_REVNUM, then return None."""

    return self.svn_commit_db.get(svn_revnum, None)

  def put_svn_commit(self, svn_commit):
    """Record the bidirectional mapping between SVN_REVNUM and
    CVS_REVS and record associated attributes."""

    if self.mode == DB_OPEN_READ:
      raise RuntimeError(
          'Write operation attempted on read-only PersistenceManager'
          )

    self.svn_commit_db[svn_commit.revnum] = svn_commit

    if isinstance(svn_commit, SVNRevisionCommit):
      for cvs_rev in svn_commit.cvs_revs:
        self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum

  def close(self):
    self.cvs2svn_db.close()
    self.cvs2svn_db = None
    self.svn_commit_db.close()
    self.svn_commit_db = None
Example #12
0
class _NodeDatabase(object):
    """A database storing all of the directory nodes.

  The nodes are written in groups every time write_new_nodes() is
  called.  To the database is written a dictionary {node_id :
  [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
  the new nodes.  When a node is read, its whole group is read and
  cached under the assumption that the other nodes in the group are
  likely to be needed soon.  The cache is retained across revisions
  and cleared when _cache_max_size is exceeded.

  The dictionaries for nodes that have been read from the database
  during the current revision are cached by node_id in the _cache
  member variable.  The corresponding dictionaries are *not* copied
  when read.  To avoid cross-talk between distinct MirrorDirectory
  instances that have the same node_id, users of these dictionaries
  have to copy them before modification."""

    # How many entries should be allowed in the cache for each
    # CVSDirectory in the repository.  (This number is very roughly the
    # number of complete lines of development that can be stored in the
    # cache at one time.)
    CACHE_SIZE_MULTIPLIER = 5

    # But the cache will never be limited to less than this number:
    MIN_CACHE_LIMIT = 5000

    def __init__(self):
        self.cvs_path_db = Ctx()._cvs_path_db
        self.db = IndexedDatabase(
            artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=MarshalSerializer(),
        )

        # A list of the maximum node_id stored by each call to
        # write_new_nodes():
        self._max_node_ids = [0]

        # A map {node_id : {cvs_path : node_id}}:
        self._cache = {}

        # The number of directories in the repository:
        num_dirs = len([
            cvs_path for cvs_path in self.cvs_path_db.itervalues()
            if isinstance(cvs_path, CVSDirectory)
        ])

        self._cache_max_size = max(
            int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
            self.MIN_CACHE_LIMIT,
        )

    def _load(self, items):
        retval = {}
        for (id, value) in items:
            retval[self.cvs_path_db.get_path(id)] = value
        return retval

    def _dump(self, node):
        return [(cvs_path.id, value) for (cvs_path, value) in node.iteritems()]

    def _determine_index(self, id):
        """Return the index of the record holding the node with ID."""

        return bisect.bisect_left(self._max_node_ids, id)

    def __getitem__(self, id):
        try:
            items = self._cache[id]
        except KeyError:
            index = self._determine_index(id)
            for (node_id, items) in self.db[index].items():
                self._cache[node_id] = self._load(items)
            items = self._cache[id]

        return items

    def write_new_nodes(self, nodes):
        """Write NODES to the database.

    NODES is an iterable of writable CurrentMirrorDirectory instances."""

        if len(self._cache) > self._cache_max_size:
            # The size of the cache has exceeded the threshold.  Discard the
            # old cache values (but still store the new nodes into the
            # cache):
            logger.debug('Clearing node cache')
            self._cache.clear()

        data = {}
        max_node_id = 0
        for node in nodes:
            max_node_id = max(max_node_id, node.id)
            data[node.id] = self._dump(node._entries)
            self._cache[node.id] = node._entries

        self.db[len(self._max_node_ids)] = data

        if max_node_id == 0:
            # Rewrite last value:
            self._max_node_ids.append(self._max_node_ids[-1])
        else:
            self._max_node_ids.append(max_node_id)

    def close(self):
        self._cache.clear()
        self.db.close()
        self.db = None
class PersistenceManager:
    """The PersistenceManager allows us to effectively store SVNCommits
  to disk and retrieve them later using only their subversion revision
  number as the key.  It also returns the subversion revision number
  for a given CVSRevision's unique key.

  All information pertinent to each SVNCommit is stored in a series of
  on-disk databases so that SVNCommits can be retrieved on-demand.

  MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
  In 'new' mode, PersistenceManager will initialize a new set of on-disk
  databases and be fully-featured.
  In 'read' mode, PersistenceManager will open existing on-disk databases
  and the set_* methods will be unavailable."""
    def __init__(self, mode):
        self.mode = mode
        if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
            raise RuntimeError("Invalid 'mode' argument to PersistenceManager")
        primer = (
            SVNInitialProjectCommit,
            SVNPrimaryCommit,
            SVNPostCommit,
            SVNBranchCommit,
            SVNTagCommit,
        )
        serializer = PrimedPickleSerializer(primer)
        self.svn_commit_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
            artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode,
            serializer)
        self.cvs2svn_db = RecordTable(
            artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
            mode, SignedIntegerPacker(SVN_INVALID_REVNUM))

    def get_svn_revnum(self, cvs_rev_id):
        """Return the Subversion revision number in which CVS_REV_ID was
    committed, or SVN_INVALID_REVNUM if there is no mapping for
    CVS_REV_ID."""

        return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)

    def get_svn_commit(self, svn_revnum):
        """Return an SVNCommit that corresponds to SVN_REVNUM.

    If no SVNCommit exists for revnum SVN_REVNUM, then return None."""

        return self.svn_commit_db.get(svn_revnum, None)

    def put_svn_commit(self, svn_commit):
        """Record the bidirectional mapping between SVN_REVNUM and
    CVS_REVS and record associated attributes."""

        if self.mode == DB_OPEN_READ:
            raise RuntimeError(
                'Write operation attempted on read-only PersistenceManager')

        self.svn_commit_db[svn_commit.revnum] = svn_commit

        if isinstance(svn_commit, SVNRevisionCommit):
            for cvs_rev in svn_commit.cvs_revs:
                self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum

    def close(self):
        self.cvs2svn_db.close()
        self.cvs2svn_db = None
        self.svn_commit_db.close()
        self.svn_commit_db = None
Example #14
0
class _NodeDatabase(object):
  """A database storing all of the directory nodes.

  The nodes are written in groups every time write_new_nodes() is
  called.  To the database is written a dictionary {node_id :
  [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
  the new nodes.  When a node is read, its whole group is read and
  cached under the assumption that the other nodes in the group are
  likely to be needed soon.  The cache is retained across revisions
  and cleared when _cache_max_size is exceeded.

  The dictionaries for nodes that have been read from the database
  during the current revision are cached by node_id in the _cache
  member variable.  The corresponding dictionaries are *not* copied
  when read.  To avoid cross-talk between distinct MirrorDirectory
  instances that have the same node_id, users of these dictionaries
  have to copy them before modification."""

  # How many entries should be allowed in the cache for each
  # CVSDirectory in the repository.  (This number is very roughly the
  # number of complete lines of development that can be stored in the
  # cache at one time.)
  CACHE_SIZE_MULTIPLIER = 5

  # But the cache will never be limited to less than this number:
  MIN_CACHE_LIMIT = 5000

  def __init__(self):
    self.cvs_path_db = Ctx()._cvs_path_db
    self.db = IndexedDatabase(
        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=MarshalSerializer(),
        )

    # A list of the maximum node_id stored by each call to
    # write_new_nodes():
    self._max_node_ids = [0]

    # A map {node_id : {cvs_path : node_id}}:
    self._cache = {}

    # The number of directories in the repository:
    num_dirs = len([
        cvs_path
        for cvs_path in self.cvs_path_db.itervalues()
        if isinstance(cvs_path, CVSDirectory)
        ])

    self._cache_max_size = max(
        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
        self.MIN_CACHE_LIMIT,
        )

  def _load(self, items):
    retval = {}
    for (id, value) in items:
      retval[self.cvs_path_db.get_path(id)] = value
    return retval

  def _dump(self, node):
    return [
        (cvs_path.id, value)
        for (cvs_path, value) in node.iteritems()
        ]

  def _determine_index(self, id):
    """Return the index of the record holding the node with ID."""

    return bisect.bisect_left(self._max_node_ids, id)

  def __getitem__(self, id):
    try:
      items = self._cache[id]
    except KeyError:
      index = self._determine_index(id)
      for (node_id, items) in self.db[index].items():
        self._cache[node_id] = self._load(items)
      items = self._cache[id]

    return items

  def write_new_nodes(self, nodes):
    """Write NODES to the database.

    NODES is an iterable of writable CurrentMirrorDirectory instances."""

    if len(self._cache) > self._cache_max_size:
      # The size of the cache has exceeded the threshold.  Discard the
      # old cache values (but still store the new nodes into the
      # cache):
      logger.debug('Clearing node cache')
      self._cache.clear()

    data = {}
    max_node_id = 0
    for node in nodes:
      max_node_id = max(max_node_id, node.id)
      data[node.id] = self._dump(node._entries)
      self._cache[node.id] = node._entries

    self.db[len(self._max_node_ids)] = data

    if max_node_id == 0:
      # Rewrite last value:
      self._max_node_ids.append(self._max_node_ids[-1])
    else:
      self._max_node_ids.append(max_node_id)

  def close(self):
    self._cache.clear()
    self.db.close()
    self.db = None