def runTest(self): self.assert_(os.path.isfile(self.filename + ',v')) recorder = RCSRecorder() f = open(self.filename + ',v', 'rb') try: parse(f, recorder) finally: f.close() v2 = recorder.texts['1.2'] self.assertEqual(v2, self.v2) delta = recorder.texts['1.1'] s = RCSStream(v2) self.assertEqual(s.get_text(), self.v2) invdelta = s.invert_diff(delta) self.assertEqual(s.get_text(), self.v1) delta2 = s.invert_diff(invdelta) self.applyTest(self.v2, delta, self.v1) self.applyTest(self.v1, invdelta, self.v2) if STRICT_INVERSES: self.assertEqual(delta2, delta) elif delta2 != delta: self.applyTest(self.v2, delta2, self.v1)
def applyTest(self, old, delta, new): s1 = RCSStream(old) self.assertEqual(s1.get_text(), old) s1.apply_diff(delta) self.assertEqual(s1.get_text(), new) s2 = RCSStream(old) self.assertEqual(s2.get_text(), old) s2.invert_diff(delta) self.assertEqual(s2.get_text(), new)
class _Sink(Sink): def __init__(self, revision_collector, cvs_file_items): self.revision_collector = revision_collector self.cvs_file_items = cvs_file_items # A map {rev : base_rev} indicating that the text for rev is # stored in CVS as a delta relative to base_rev. self.base_revisions = {} # The revision that is stored with its fulltext in CVS (usually # the oldest revision on trunk): self.head_revision = None # The first logical revision on trunk (usually '1.1'): self.revision_1_1 = None # Keep track of the revisions whose revision info has been seen so # far (to avoid repeated revision info blocks): self.revisions_seen = set() def set_head_revision(self, revision): self.head_revision = revision def define_revision(self, revision, timestamp, author, state, branches, next): if next: self.base_revisions[next] = revision else: if is_trunk_revision(revision): self.revision_1_1 = revision for branch in branches: self.base_revisions[branch] = revision def set_revision_info(self, revision, log, text): if revision in self.revisions_seen: # One common form of CVS repository corruption is that the # Deltatext block for revision 1.1 appears twice. CollectData # has already warned about this problem; here we can just ignore # it. return else: self.revisions_seen.add(revision) cvs_rev_id = self.cvs_file_items.original_ids[revision] if is_trunk_revision(revision): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if revision == self.head_revision: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._rcs_stream = RCSStream(text) self._rcs_stream_revision = revision else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._rcs_stream.invert_diff(text) except MalformedDeltaException, e: logger.error( 'Malformed RCS delta in %s, revision %s: %s' % (self.cvs_file_items.cvs_file.rcs_path, revision, e)) raise RuntimeError() text_record = DeltaTextRecord( self.cvs_file_items.original_ids[ self._rcs_stream_revision], cvs_rev_id) self.revision_collector._writeout(text_record, text) self._rcs_stream_revision = revision if revision == self.revision_1_1: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout(text_record, self._rcs_stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._rcs_stream del self._rcs_stream_revision else:
class InternalRevisionRecorder(RevisionRecorder): """A RevisionRecorder that reconstructs the fulltext internally.""" def __init__(self, compress): RevisionRecorder.__init__(self) self._compress = compress def register_artifacts(self, which_pass): artifact_manager.register_temp_file(config.RCS_DELTAS_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) artifact_manager.register_temp_file(config.RCS_TREES_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def start_file(self, cvs_file_items): self._cvs_file_items = cvs_file_items # A map from cvs_rev_id to TextRecord instance: self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error( 'Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream else:
class InternalRevisionRecorder(RevisionRecorder): """A RevisionRecorder that reconstructs the fulltext internally.""" def __init__(self, compress): RevisionRecorder.__init__(self) self._compress = compress def register_artifacts(self, which_pass): artifact_manager.register_temp_file( config.RCS_DELTAS_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) artifact_manager.register_temp_file( config.RCS_TREES_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def start_file(self, cvs_file_items): self._cvs_file_items = cvs_file_items # A map from cvs_rev_id to TextRecord instance: self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error('Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream else:
class _Sink(Sink): def __init__(self, revision_collector, cvs_file_items): self.revision_collector = revision_collector self.cvs_file_items = cvs_file_items # A map {rev : base_rev} indicating that the text for rev is # stored in CVS as a delta relative to base_rev. self.base_revisions = {} # The revision that is stored with its fulltext in CVS (usually # the oldest revision on trunk): self.head_revision = None # The first logical revision on trunk (usually '1.1'): self.revision_1_1 = None # Keep track of the revisions whose revision info has been seen so # far (to avoid repeated revision info blocks): self.revisions_seen = set() def set_head_revision(self, revision): self.head_revision = revision def define_revision( self, revision, timestamp, author, state, branches, next ): if next: self.base_revisions[next] = revision else: if is_trunk_revision(revision): self.revision_1_1 = revision for branch in branches: self.base_revisions[branch] = revision def set_revision_info(self, revision, log, text): if revision in self.revisions_seen: # One common form of CVS repository corruption is that the # Deltatext block for revision 1.1 appears twice. CollectData # has already warned about this problem; here we can just ignore # it. return else: self.revisions_seen.add(revision) cvs_rev_id = self.cvs_file_items.original_ids[revision] if is_trunk_revision(revision): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if revision == self.head_revision: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._rcs_stream = RCSStream(text) self._rcs_stream_revision = revision else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._rcs_stream.invert_diff(text) except MalformedDeltaException, e: logger.error( 'Malformed RCS delta in %s, revision %s: %s' % (self.cvs_file_items.cvs_file.rcs_path, revision, e) ) raise RuntimeError() text_record = DeltaTextRecord( self.cvs_file_items.original_ids[self._rcs_stream_revision], cvs_rev_id ) self.revision_collector._writeout(text_record, text) self._rcs_stream_revision = revision if revision == self.revision_1_1: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout( text_record, self._rcs_stream.get_text() ) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._rcs_stream del self._rcs_stream_revision else: