def runTest(self): self.assert_(os.path.isfile(self.filename + ',v')) recorder = RCSRecorder() f = open(self.filename + ',v', 'rb') try: parse(f, recorder) finally: f.close() v2 = recorder.texts['1.2'] self.assertEqual(v2, self.v2) delta = recorder.texts['1.1'] s = RCSStream(v2) self.assertEqual(s.get_text(), self.v2) invdelta = s.invert_diff(delta) self.assertEqual(s.get_text(), self.v1) delta2 = s.invert_diff(invdelta) self.applyTest(self.v2, delta, self.v1) self.applyTest(self.v1, invdelta, self.v2) if STRICT_INVERSES: self.assertEqual(delta2, delta) elif delta2 != delta: self.applyTest(self.v2, delta2, self.v1)
def set_revision_info(self, revision, log, text): if revision in self.revisions_seen: # One common form of CVS repository corruption is that the # Deltatext block for revision 1.1 appears twice. CollectData # has already warned about this problem; here we can just ignore # it. return else: self.revisions_seen.add(revision) cvs_rev_id = self.cvs_file_items.original_ids[revision] if is_trunk_revision(revision): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if revision == self.head_revision: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._rcs_stream = RCSStream(text) self._rcs_stream_revision = revision else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._rcs_stream.invert_diff(text) except MalformedDeltaException, e: logger.error( 'Malformed RCS delta in %s, revision %s: %s' % (self.cvs_file_items.cvs_file.rcs_path, revision, e)) raise RuntimeError() text_record = DeltaTextRecord( self.cvs_file_items.original_ids[ self._rcs_stream_revision], cvs_rev_id) self.revision_collector._writeout(text_record, text) self._rcs_stream_revision = revision if revision == self.revision_1_1: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout(text_record, self._rcs_stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._rcs_stream del self._rcs_stream_revision
def applyTest(self, old, delta, new): s1 = RCSStream(old) self.assertEqual(s1.get_text(), old) s1.apply_diff(delta) self.assertEqual(s1.get_text(), new) s2 = RCSStream(old) self.assertEqual(s2.get_text(), old) s2.invert_diff(delta) self.assertEqual(s2.get_text(), new)
def set_revision_info(self, rev, log, text): revrec = self.revrecs.get(rev) if revrec is None: return base_rev = revrec.base if base_rev is None: # This must be the last revision on trunk, for which the # fulltext is stored directly in the RCS file: assert self.last_revrec is None if revrec.mark is not None: revrec.write_blob(self.blobfile, text) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = RCSStream(text) elif self.last_revrec is not None and base_rev == self.last_revrec.rev: # Our base revision is stored in self.last_rcsstream. self.last_revrec.refs.remove(rev) if self.last_revrec.is_needed(): if not self.last_revrec.is_written(): self.last_revrec.write( self.fulltext_file, self.last_rcsstream.get_text() ) self.last_rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, self.last_rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec else: self.last_revrec = None self.last_rcsstream = None else: # Our base revision is not stored in self.last_rcsstream; it # will have to be obtained from elsewhere. # Store the old last_rcsstream if necessary: if self.last_revrec is not None: if not self.last_revrec.is_written(): self.last_revrec.write( self.fulltext_file, self.last_rcsstream.get_text() ) self.last_revrec = None self.last_rcsstream = None base_revrec = self[base_rev] rcsstream = RCSStream(base_revrec.read_fulltext()) base_revrec.refs.remove(rev) rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = rcsstream del rcsstream
def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error( 'Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream
def checkout(self, text_record_db): base_text = text_record_db[self.pred_id].checkout(text_record_db) rcs_stream = RCSStream(base_text) delta_text = text_record_db.delta_db[self.id] rcs_stream.apply_diff(delta_text) text = rcs_stream.get_text() del rcs_stream self.refcount -= 1 if self.refcount == 0: # This text will never be needed again; just delete ourselves # without ever having stored the fulltext to the checkout # database: del text_record_db[self.id] else: # Store a new CheckedOutTextRecord in place of ourselves: text_record_db.checkout_db['%x' % self.id] = text new_text_record = CheckedOutTextRecord(self.id) new_text_record.refcount = self.refcount text_record_db.replace(new_text_record) return text
def checkout(self, text_record_db): base_text = text_record_db[self.pred_id].checkout(text_record_db) co = RCSStream(base_text) delta_text = text_record_db.delta_db[self.id] co.apply_diff(delta_text) text = co.get_text() del co self.refcount -= 1 if self.refcount == 0: # This text will never be needed again; just delete ourselves # without ever having stored the fulltext to the checkout # database: del text_record_db[self.id] else: # Store a new CheckedOutTextRecord in place of ourselves: text_record_db.checkout_db['%x' % self.id] = text new_text_record = CheckedOutTextRecord(self.id) new_text_record.refcount = self.refcount text_record_db.replace(new_text_record) return text
def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error('Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream
def set_revision_info(self, revision, log, text): if revision in self.revisions_seen: # One common form of CVS repository corruption is that the # Deltatext block for revision 1.1 appears twice. CollectData # has already warned about this problem; here we can just ignore # it. return else: self.revisions_seen.add(revision) cvs_rev_id = self.cvs_file_items.original_ids[revision] if is_trunk_revision(revision): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if revision == self.head_revision: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._rcs_stream = RCSStream(text) self._rcs_stream_revision = revision else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._rcs_stream.invert_diff(text) except MalformedDeltaException, e: logger.error( 'Malformed RCS delta in %s, revision %s: %s' % (self.cvs_file_items.cvs_file.rcs_path, revision, e) ) raise RuntimeError() text_record = DeltaTextRecord( self.cvs_file_items.original_ids[self._rcs_stream_revision], cvs_rev_id ) self.revision_collector._writeout(text_record, text) self._rcs_stream_revision = revision if revision == self.revision_1_1: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout( text_record, self._rcs_stream.get_text() ) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._rcs_stream del self._rcs_stream_revision
class WriteBlobSink(Sink): def __init__(self, blobfile, marks): self.blobfile = blobfile # A map {rev : RevRecord} for all of the revisions whose fulltext # will still be needed: self.revrecs = {} # The revisions that need marks will definitely be needed, so # create records for them now (the rest will be filled in while # reading the RCS file): for (rev, mark) in marks.items(): self.revrecs[rev] = RevRecord(rev, mark) # The RevRecord of the last fulltext that has been reconstructed, # if it still is_needed(): self.last_revrec = None # An RCSStream holding the fulltext of last_revrec: self.last_rcsstream = None # A file to temporarily hold the fulltexts of revisions for which # no blobs are needed: self.fulltext_file = tempfile.TemporaryFile() def __getitem__(self, rev): try: return self.revrecs[rev] except KeyError: revrec = RevRecord(rev) self.revrecs[rev] = revrec return revrec def define_revision(self, rev, timestamp, author, state, branches, next): revrec = self[rev] if next is not None: revrec.refs.add(next) revrec.refs.update(branches) for dependent_rev in revrec.refs: dependent_revrec = self[dependent_rev] assert dependent_revrec.base is None dependent_revrec.base = rev def tree_completed(self): """Remove unneeded RevRecords. Remove the RevRecords for any revisions whose fulltext will not be needed (neither as blob output nor as the base of another needed revision).""" revrecs_to_remove = [ revrec for revrec in self.revrecs.itervalues() if not revrec.is_needed() ] while revrecs_to_remove: revrec = revrecs_to_remove.pop() del self.revrecs[revrec.rev] base_revrec = self[revrec.base] base_revrec.refs.remove(revrec.rev) if not base_revrec.is_needed(): revrecs_to_remove.append(base_revrec) def set_revision_info(self, rev, log, text): revrec = self.revrecs.get(rev) if revrec is None: return base_rev = revrec.base if base_rev is None: # This must be the last revision on trunk, for which the # fulltext is stored directly in the RCS file: assert self.last_revrec is None if revrec.mark is not None: revrec.write_blob(self.blobfile, text) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = RCSStream(text) elif self.last_revrec is not None and base_rev == self.last_revrec.rev: # Our base revision is stored in self.last_rcsstream. self.last_revrec.refs.remove(rev) if self.last_revrec.is_needed(): if not self.last_revrec.is_written(): self.last_revrec.write(self.fulltext_file, self.last_rcsstream.get_text()) self.last_rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, self.last_rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec else: self.last_revrec = None self.last_rcsstream = None else: # Our base revision is not stored in self.last_rcsstream; it # will have to be obtained from elsewhere. # Store the old last_rcsstream if necessary: if self.last_revrec is not None: if not self.last_revrec.is_written(): self.last_revrec.write(self.fulltext_file, self.last_rcsstream.get_text()) self.last_revrec = None self.last_rcsstream = None base_revrec = self[base_rev] rcsstream = RCSStream(base_revrec.read_fulltext()) base_revrec.refs.remove(rev) rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = rcsstream del rcsstream def parse_completed(self): self.fulltext_file.close()
class InternalRevisionRecorder(RevisionRecorder): """A RevisionRecorder that reconstructs the fulltext internally.""" def __init__(self, compress): RevisionRecorder.__init__(self) self._compress = compress def register_artifacts(self, which_pass): artifact_manager.register_temp_file(config.RCS_DELTAS_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) artifact_manager.register_temp_file(config.RCS_TREES_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def start_file(self, cvs_file_items): self._cvs_file_items = cvs_file_items # A map from cvs_rev_id to TextRecord instance: self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error( 'Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream else:
class WriteBlobSink(Sink): def __init__(self, blobfile, marks): self.blobfile = blobfile # A map {rev : RevRecord} for all of the revisions whose fulltext # will still be needed: self.revrecs = {} # The revisions that need marks will definitely be needed, so # create records for them now (the rest will be filled in while # reading the RCS file): for (rev, mark) in marks.items(): self.revrecs[rev] = RevRecord(rev, mark) # The RevRecord of the last fulltext that has been reconstructed, # if it still is_needed(): self.last_revrec = None # An RCSStream holding the fulltext of last_revrec: self.last_rcsstream = None # A file to temporarily hold the fulltexts of revisions for which # no blobs are needed: self.fulltext_file = tempfile.TemporaryFile() def __getitem__(self, rev): try: return self.revrecs[rev] except KeyError: revrec = RevRecord(rev) self.revrecs[rev] = revrec return revrec def define_revision(self, rev, timestamp, author, state, branches, next): revrec = self[rev] if next is not None: revrec.refs.add(next) revrec.refs.update(branches) for dependent_rev in revrec.refs: dependent_revrec = self[dependent_rev] assert dependent_revrec.base is None dependent_revrec.base = rev def tree_completed(self): """Remove unneeded RevRecords. Remove the RevRecords for any revisions whose fulltext will not be needed (neither as blob output nor as the base of another needed revision).""" revrecs_to_remove = [ revrec for revrec in self.revrecs.itervalues() if not revrec.is_needed() ] while revrecs_to_remove: revrec = revrecs_to_remove.pop() del self.revrecs[revrec.rev] if revrec.base is not None: base_revrec = self[revrec.base] base_revrec.refs.remove(revrec.rev) if not base_revrec.is_needed(): revrecs_to_remove.append(base_revrec) def set_revision_info(self, rev, log, text): revrec = self.revrecs.get(rev) if revrec is None: return base_rev = revrec.base if base_rev is None: # This must be the last revision on trunk, for which the # fulltext is stored directly in the RCS file: assert self.last_revrec is None if revrec.mark is not None: revrec.write_blob(self.blobfile, text) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = RCSStream(text) elif self.last_revrec is not None and base_rev == self.last_revrec.rev: # Our base revision is stored in self.last_rcsstream. self.last_revrec.refs.remove(rev) if self.last_revrec.is_needed() and not self.last_revrec.is_written(): self.last_revrec.write( self.fulltext_file, self.last_rcsstream.get_text() ) self.last_rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, self.last_rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec else: self.last_revrec = None self.last_rcsstream = None else: # Our base revision is not stored in self.last_rcsstream; it # will have to be obtained from elsewhere. # Store the old last_rcsstream if necessary: if self.last_revrec is not None: if not self.last_revrec.is_written(): self.last_revrec.write( self.fulltext_file, self.last_rcsstream.get_text() ) self.last_revrec = None self.last_rcsstream = None base_revrec = self[base_rev] rcsstream = RCSStream(base_revrec.read_fulltext()) base_revrec.refs.remove(rev) rcsstream.apply_diff(text) if revrec.mark is not None: revrec.write_blob(self.blobfile, rcsstream.get_text()) if revrec.is_needed(): self.last_revrec = revrec self.last_rcsstream = rcsstream del rcsstream def parse_completed(self): self.fulltext_file.close()
class InternalRevisionRecorder(RevisionRecorder): """A RevisionRecorder that reconstructs the fulltext internally.""" def __init__(self, compress): RevisionRecorder.__init__(self) self._compress = compress def register_artifacts(self, which_pass): artifact_manager.register_temp_file( config.RCS_DELTAS_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) artifact_manager.register_temp_file( config.RCS_TREES_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def start_file(self, cvs_file_items): self._cvs_file_items = cvs_file_items # A map from cvs_rev_id to TextRecord instance: self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase()) def record_text(self, cvs_rev, log, text): if isinstance(cvs_rev.lod, Trunk): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if cvs_rev.next_id is None: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._stream = RCSStream(text) else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._stream.invert_diff(text) except MalformedDeltaException, (msg): Log().error('Malformed RCS delta in %s, revision %s: %s' % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg)) raise RuntimeError text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id) self._writeout(text_record, text) if cvs_rev.prev_id is None: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev.id) self._writeout(text_record, self._stream.get_text()) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._stream else:
class _Sink(cvs2svn_rcsparse.Sink): def __init__(self, revision_collector, cvs_file_items): self.revision_collector = revision_collector self.cvs_file_items = cvs_file_items # A map {rev : base_rev} indicating that the text for rev is # stored in CVS as a delta relative to base_rev. self.base_revisions = {} # The revision that is stored with its fulltext in CVS (usually # the oldest revision on trunk): self.head_revision = None # The first logical revision on trunk (usually '1.1'): self.revision_1_1 = None # Keep track of the revisions whose revision info has been seen so # far (to avoid repeated revision info blocks): self.revisions_seen = set() def set_head_revision(self, revision): self.head_revision = revision def define_revision( self, revision, timestamp, author, state, branches, next ): if next: self.base_revisions[next] = revision else: if is_trunk_revision(revision): self.revision_1_1 = revision for branch in branches: self.base_revisions[branch] = revision def set_revision_info(self, revision, log, text): if revision in self.revisions_seen: # One common form of CVS repository corruption is that the # Deltatext block for revision 1.1 appears twice. CollectData # has already warned about this problem; here we can just ignore # it. return else: self.revisions_seen.add(revision) cvs_rev_id = self.cvs_file_items.original_ids[revision] if is_trunk_revision(revision): # On trunk, revisions are encountered in reverse order (1.<N> # ... 1.1) and deltas are inverted. The first text that we see # is the fulltext for the HEAD revision. After that, the text # corresponding to revision 1.N is the delta (1.<N+1> -> # 1.<N>)). We have to invert the deltas here so that we can # read the revisions out in dependency order; that is, for # revision 1.1 we want the fulltext, and for revision 1.<N> we # want the delta (1.<N-1> -> 1.<N>). This means that we can't # compute the delta for a revision until we see its logical # parent. When we finally see revision 1.1 (which is recognized # because it doesn't have a parent), we can record the diff (1.1 # -> 1.2) for revision 1.2, and also the fulltext for 1.1. if revision == self.head_revision: # This is HEAD, as fulltext. Initialize the RCSStream so # that we can compute deltas backwards in time. self._rcs_stream = RCSStream(text) self._rcs_stream_revision = revision else: # Any other trunk revision is a backward delta. Apply the # delta to the RCSStream to mutate it to the contents of this # revision, and also to get the reverse delta, which we store # as the forward delta of our child revision. try: text = self._rcs_stream.invert_diff(text) except MalformedDeltaException, e: logger.error( 'Malformed RCS delta in %s, revision %s: %s' % (self.cvs_file_items.cvs_file.filename, revision, e) ) raise RuntimeError() text_record = DeltaTextRecord( self.cvs_file_items.original_ids[self._rcs_stream_revision], cvs_rev_id ) self.revision_collector._writeout(text_record, text) self._rcs_stream_revision = revision if revision == self.revision_1_1: # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout( text_record, self._rcs_stream.get_text() ) # There will be no more trunk revisions delivered, so free the # RCSStream. del self._rcs_stream del self._rcs_stream_revision else: