def __init__(self, filename, index_filename, mode, serializer=None): """Initialize an IndexedDatabase, writing the serializer if necessary. SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the serializer is read from the file.""" self.filename = filename self.index_filename = index_filename self.mode = mode if self.mode == DB_OPEN_NEW: self.f = open(self.filename, 'wb+') elif self.mode == DB_OPEN_WRITE: self.f = open(self.filename, 'rb+') elif self.mode == DB_OPEN_READ: self.f = open(self.filename, 'rb') else: raise RuntimeError('Invalid mode %r' % self.mode) self.index_table = RecordTable(self.index_filename, self.mode, FileOffsetPacker()) if self.mode == DB_OPEN_NEW: assert serializer is not None self.serializer = serializer cPickle.dump(self.serializer, self.f, -1) else: # Read the memo from the first pickle: self.serializer = cPickle.load(self.f) # Seek to the end of the file, and record that position: self.f.seek(0, 2) self.fp = self.f.tell() self.eofp = self.fp
def __init__(self, filename, index_filename, mode, serializer=None): """Initialize an IndexedDatabase, writing the serializer if necessary. SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the serializer is read from the file.""" self.filename = filename self.index_filename = index_filename self.mode = mode if self.mode == DB_OPEN_NEW: self.f = open(self.filename, 'wb+') elif self.mode == DB_OPEN_WRITE: self.f = open(self.filename, 'rb+') elif self.mode == DB_OPEN_READ: self.f = open(self.filename, 'rb') else: raise RuntimeError('Invalid mode %r' % self.mode) self.index_table = RecordTable( self.index_filename, self.mode, FileOffsetPacker() ) if self.mode == DB_OPEN_NEW: assert serializer is not None self.serializer = serializer cPickle.dump(self.serializer, self.f, -1) else: # Read the memo from the first pickle: self.serializer = cPickle.load(self.f) # Seek to the end of the file, and record that position: self.f.seek(0, 2) self.fp = self.f.tell() self.eofp = self.fp
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" primer = ( SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit, ) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError("Invalid 'mode' argument to PersistenceManager") primer = (SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer, ) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM), )
class PersistenceManager: """The PersistenceManager allows us to effectively store SVNCommits to disk and retrieve them later using only their subversion revision number as the key. It also returns the subversion revision number for a given CVSRevision's unique key. All information pertinent to each SVNCommit is stored in a series of on-disk databases so that SVNCommits can be retrieved on-demand. MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ. In 'new' mode, PersistenceManager will initialize a new set of on-disk databases and be fully-featured. In 'read' mode, PersistenceManager will open existing on-disk databases and the set_* methods will be unavailable.""" def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" primer = ( SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit, ) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM)) def get_svn_revnum(self, cvs_rev_id): """Return the Subversion revision number in which CVS_REV_ID was committed, or SVN_INVALID_REVNUM if there is no mapping for CVS_REV_ID.""" return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM) def get_svn_commit(self, svn_revnum): """Return an SVNCommit that corresponds to SVN_REVNUM. If no SVNCommit exists for revnum SVN_REVNUM, then return None.""" return self.svn_commit_db.get(svn_revnum, None) def put_svn_commit(self, svn_commit): """Record the bidirectional mapping between SVN_REVNUM and CVS_REVS and record associated attributes.""" if self.mode == DB_OPEN_READ: raise RuntimeError, \ 'Write operation attempted on read-only PersistenceManager' self.svn_commit_db[svn_commit.revnum] = svn_commit if isinstance(svn_commit, SVNRevisionCommit): for cvs_rev in svn_commit.cvs_revs: self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum def close(self): self.cvs2svn_db.close() self.cvs2svn_db = None self.svn_commit_db.close() self.svn_commit_db = None
class IndexedDatabase: """A file of objects that are written sequentially and read randomly. The objects are indexed by small non-negative integers, and a RecordTable is used to store the index -> fileoffset map. fileoffset=0 is used to represent an empty record. (An offset of 0 cannot occur for a legitimate record because the serializer is written there.) The main file consists of a sequence of pickles (or other serialized data format). The zeroth record is a pickled Serializer. Subsequent ones are objects serialized using the serializer. The offset of each object in the file is stored to an index table so that the data can later be retrieved randomly. Objects are always stored to the end of the file. If an object is deleted or overwritten, the fact is recorded in the index_table but the space in the pickle file is not garbage collected. This has the advantage that one can create a modified version of a database that shares the main data file with an old version by copying the index file. But it has the disadvantage that space is wasted whenever objects are written multiple times.""" def __init__(self, filename, index_filename, mode, serializer=None): """Initialize an IndexedDatabase, writing the serializer if necessary. SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the serializer is read from the file.""" self.filename = filename self.index_filename = index_filename self.mode = mode if self.mode == DB_OPEN_NEW: self.f = open(self.filename, 'wb+') elif self.mode == DB_OPEN_WRITE: self.f = open(self.filename, 'rb+') elif self.mode == DB_OPEN_READ: self.f = open(self.filename, 'rb') else: raise RuntimeError('Invalid mode %r' % self.mode) self.index_table = RecordTable(self.index_filename, self.mode, FileOffsetPacker()) if self.mode == DB_OPEN_NEW: assert serializer is not None self.serializer = serializer cPickle.dump(self.serializer, self.f, -1) else: # Read the memo from the first pickle: self.serializer = cPickle.load(self.f) # Seek to the end of the file, and record that position: self.f.seek(0, 2) self.fp = self.f.tell() self.eofp = self.fp def __setitem__(self, index, item): """Write ITEM into the database indexed by INDEX.""" # Make sure we're at the end of the file: if self.fp != self.eofp: self.f.seek(self.eofp) self.index_table[index] = self.eofp s = self.serializer.dumps(item) self.f.write(s) self.eofp += len(s) self.fp = self.eofp def _fetch(self, offset): if self.fp != offset: self.f.seek(offset) # There is no easy way to tell how much data will be read, so just # indicate that we don't know the current file pointer: self.fp = None return self.serializer.loadf(self.f) def iterkeys(self): return self.index_table.iterkeys() def itervalues(self): for offset in self.index_table.itervalues(): yield self._fetch(offset) def __getitem__(self, index): offset = self.index_table[index] return self._fetch(offset) def get(self, item, default=None): try: return self[item] except KeyError: return default def get_many(self, indexes, default=None): """Yield (index,item) tuples for INDEXES, in arbitrary order. Yield (index,default) for indexes with no defined values.""" offsets = [] for (index, offset) in self.index_table.get_many(indexes): if offset is None: yield (index, default) else: offsets.append((offset, index)) # Sort the offsets to reduce disk seeking: offsets.sort() for (offset, index) in offsets: yield (index, self._fetch(offset)) def __delitem__(self, index): # We don't actually free the data in self.f. del self.index_table[index] def close(self): self.index_table.close() self.index_table = None self.f.close() self.f = None def __str__(self): return 'IndexedDatabase(%r)' % (self.filename, )
class IndexedDatabase: """A file of objects that are written sequentially and read randomly. The objects are indexed by small non-negative integers, and a RecordTable is used to store the index -> fileoffset map. fileoffset=0 is used to represent an empty record. (An offset of 0 cannot occur for a legitimate record because the serializer is written there.) The main file consists of a sequence of pickles (or other serialized data format). The zeroth record is a pickled Serializer. Subsequent ones are objects serialized using the serializer. The offset of each object in the file is stored to an index table so that the data can later be retrieved randomly. Objects are always stored to the end of the file. If an object is deleted or overwritten, the fact is recorded in the index_table but the space in the pickle file is not garbage collected. This has the advantage that one can create a modified version of a database that shares the main data file with an old version by copying the index file. But it has the disadvantage that space is wasted whenever objects are written multiple times.""" def __init__(self, filename, index_filename, mode, serializer=None): """Initialize an IndexedDatabase, writing the serializer if necessary. SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the serializer is read from the file.""" self.filename = filename self.index_filename = index_filename self.mode = mode if self.mode == DB_OPEN_NEW: self.f = open(self.filename, 'wb+') elif self.mode == DB_OPEN_WRITE: self.f = open(self.filename, 'rb+') elif self.mode == DB_OPEN_READ: self.f = open(self.filename, 'rb') else: raise RuntimeError('Invalid mode %r' % self.mode) self.index_table = RecordTable( self.index_filename, self.mode, FileOffsetPacker() ) if self.mode == DB_OPEN_NEW: assert serializer is not None self.serializer = serializer cPickle.dump(self.serializer, self.f, -1) else: # Read the memo from the first pickle: self.serializer = cPickle.load(self.f) # Seek to the end of the file, and record that position: self.f.seek(0, 2) self.fp = self.f.tell() self.eofp = self.fp def __setitem__(self, index, item): """Write ITEM into the database indexed by INDEX.""" # Make sure we're at the end of the file: if self.fp != self.eofp: self.f.seek(self.eofp) self.index_table[index] = self.eofp s = self.serializer.dumps(item) self.f.write(s) self.eofp += len(s) self.fp = self.eofp def _fetch(self, offset): if self.fp != offset: self.f.seek(offset) # There is no easy way to tell how much data will be read, so just # indicate that we don't know the current file pointer: self.fp = None return self.serializer.loadf(self.f) def iterkeys(self): return self.index_table.iterkeys() def itervalues(self): for offset in self.index_table.itervalues(): yield self._fetch(offset) def __getitem__(self, index): offset = self.index_table[index] return self._fetch(offset) def get(self, item, default=None): try: return self[item] except KeyError: return default def get_many(self, indexes, default=None): """Yield (index,item) tuples for INDEXES, in arbitrary order. Yield (index,default) for indexes with no defined values.""" offsets = [] for (index, offset) in self.index_table.get_many(indexes): if offset is None: yield (index, default) else: offsets.append((offset, index)) # Sort the offsets to reduce disk seeking: offsets.sort() for (offset,index) in offsets: yield (index, self._fetch(offset)) def __delitem__(self, index): # We don't actually free the data in self.f. del self.index_table[index] def close(self): self.index_table.close() self.index_table = None self.f.close() self.f = None def __str__(self): return 'IndexedDatabase(%r)' % (self.filename,)
def CVSItemToChangesetTable(filename, mode): if use_mmap_for_cvs_item_to_changeset_table: return MmapRecordTable(filename, mode, UnsignedIntegerPacker()) else: return RecordTable(filename, mode, UnsignedIntegerPacker())