Example #1
0
  def runTest(self):
    self.assert_(os.path.isfile(self.filename + ',v'))
    recorder = RCSRecorder()

    f = open(self.filename + ',v', 'rb')
    try:
      parse(f, recorder)
    finally:
      f.close()

    v2 = recorder.texts['1.2']
    self.assertEqual(v2, self.v2)
    delta = recorder.texts['1.1']
    s = RCSStream(v2)
    self.assertEqual(s.get_text(), self.v2)
    invdelta = s.invert_diff(delta)
    self.assertEqual(s.get_text(), self.v1)
    delta2 = s.invert_diff(invdelta)

    self.applyTest(self.v2, delta, self.v1)
    self.applyTest(self.v1, invdelta, self.v2)

    if STRICT_INVERSES:
      self.assertEqual(delta2, delta)
    elif delta2 != delta:
      self.applyTest(self.v2, delta2, self.v1)
Example #2
0
  def runTest(self):
    self.assert_(os.path.isfile(self.filename + ',v'))
    recorder = RCSRecorder()

    f = open(self.filename + ',v', 'rb')
    try:
      parse(f, recorder)
    finally:
      f.close()

    v2 = recorder.texts['1.2']
    self.assertEqual(v2, self.v2)
    delta = recorder.texts['1.1']
    s = RCSStream(v2)
    self.assertEqual(s.get_text(), self.v2)
    invdelta = s.invert_diff(delta)
    self.assertEqual(s.get_text(), self.v1)
    delta2 = s.invert_diff(invdelta)

    self.applyTest(self.v2, delta, self.v1)
    self.applyTest(self.v1, invdelta, self.v2)

    if STRICT_INVERSES:
      self.assertEqual(delta2, delta)
    elif delta2 != delta:
      self.applyTest(self.v2, delta2, self.v1)
Example #3
0
    def applyTest(self, old, delta, new):
        s1 = RCSStream(old)
        self.assertEqual(s1.get_text(), old)
        s1.apply_diff(delta)
        self.assertEqual(s1.get_text(), new)

        s2 = RCSStream(old)
        self.assertEqual(s2.get_text(), old)
        s2.invert_diff(delta)
        self.assertEqual(s2.get_text(), new)
Example #4
0
  def applyTest(self, old, delta, new):
    s1 = RCSStream(old)
    self.assertEqual(s1.get_text(), old)
    s1.apply_diff(delta)
    self.assertEqual(s1.get_text(), new)

    s2 = RCSStream(old)
    self.assertEqual(s2.get_text(), old)
    s2.invert_diff(delta)
    self.assertEqual(s2.get_text(), new)
Example #5
0
class _Sink(Sink):
    def __init__(self, revision_collector, cvs_file_items):
        self.revision_collector = revision_collector
        self.cvs_file_items = cvs_file_items

        # A map {rev : base_rev} indicating that the text for rev is
        # stored in CVS as a delta relative to base_rev.
        self.base_revisions = {}

        # The revision that is stored with its fulltext in CVS (usually
        # the oldest revision on trunk):
        self.head_revision = None

        # The first logical revision on trunk (usually '1.1'):
        self.revision_1_1 = None

        # Keep track of the revisions whose revision info has been seen so
        # far (to avoid repeated revision info blocks):
        self.revisions_seen = set()

    def set_head_revision(self, revision):
        self.head_revision = revision

    def define_revision(self, revision, timestamp, author, state, branches,
                        next):
        if next:
            self.base_revisions[next] = revision
        else:
            if is_trunk_revision(revision):
                self.revision_1_1 = revision

        for branch in branches:
            self.base_revisions[branch] = revision

    def set_revision_info(self, revision, log, text):
        if revision in self.revisions_seen:
            # One common form of CVS repository corruption is that the
            # Deltatext block for revision 1.1 appears twice.  CollectData
            # has already warned about this problem; here we can just ignore
            # it.
            return
        else:
            self.revisions_seen.add(revision)

        cvs_rev_id = self.cvs_file_items.original_ids[revision]
        if is_trunk_revision(revision):
            # On trunk, revisions are encountered in reverse order (1.<N>
            # ... 1.1) and deltas are inverted.  The first text that we see
            # is the fulltext for the HEAD revision.  After that, the text
            # corresponding to revision 1.N is the delta (1.<N+1> ->
            # 1.<N>)).  We have to invert the deltas here so that we can
            # read the revisions out in dependency order; that is, for
            # revision 1.1 we want the fulltext, and for revision 1.<N> we
            # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
            # compute the delta for a revision until we see its logical
            # parent.  When we finally see revision 1.1 (which is recognized
            # because it doesn't have a parent), we can record the diff (1.1
            # -> 1.2) for revision 1.2, and also the fulltext for 1.1.

            if revision == self.head_revision:
                # This is HEAD, as fulltext.  Initialize the RCSStream so
                # that we can compute deltas backwards in time.
                self._rcs_stream = RCSStream(text)
                self._rcs_stream_revision = revision
            else:
                # Any other trunk revision is a backward delta.  Apply the
                # delta to the RCSStream to mutate it to the contents of this
                # revision, and also to get the reverse delta, which we store
                # as the forward delta of our child revision.
                try:
                    text = self._rcs_stream.invert_diff(text)
                except MalformedDeltaException, e:
                    logger.error(
                        'Malformed RCS delta in %s, revision %s: %s' %
                        (self.cvs_file_items.cvs_file.rcs_path, revision, e))
                    raise RuntimeError()
                text_record = DeltaTextRecord(
                    self.cvs_file_items.original_ids[
                        self._rcs_stream_revision], cvs_rev_id)
                self.revision_collector._writeout(text_record, text)
                self._rcs_stream_revision = revision

            if revision == self.revision_1_1:
                # This is revision 1.1.  Write its fulltext:
                text_record = FullTextRecord(cvs_rev_id)
                self.revision_collector._writeout(text_record,
                                                  self._rcs_stream.get_text())

                # There will be no more trunk revisions delivered, so free the
                # RCSStream.
                del self._rcs_stream
                del self._rcs_stream_revision

        else:
Example #6
0
class InternalRevisionRecorder(RevisionRecorder):
    """A RevisionRecorder that reconstructs the fulltext internally."""
    def __init__(self, compress):
        RevisionRecorder.__init__(self)
        self._compress = compress

    def register_artifacts(self, which_pass):
        artifact_manager.register_temp_file(config.RCS_DELTAS_INDEX_TABLE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_DELTAS_STORE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_TREES_INDEX_TABLE,
                                            which_pass)
        artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)

    def start(self):
        ser = MarshalSerializer()
        if self._compress:
            ser = CompressingSerializer(ser)
        self._rcs_deltas = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
            artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
            DB_OPEN_NEW, ser)
        primer = (FullTextRecord, DeltaTextRecord)
        self._rcs_trees = IndexedDatabase(
            artifact_manager.get_temp_file(config.RCS_TREES_STORE),
            artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
            DB_OPEN_NEW, PrimedPickleSerializer(primer))

    def start_file(self, cvs_file_items):
        self._cvs_file_items = cvs_file_items

        # A map from cvs_rev_id to TextRecord instance:
        self.text_record_db = TextRecordDatabase(self._rcs_deltas,
                                                 NullDatabase())

    def record_text(self, cvs_rev, log, text):
        if isinstance(cvs_rev.lod, Trunk):
            # On trunk, revisions are encountered in reverse order (1.<N>
            # ... 1.1) and deltas are inverted.  The first text that we see
            # is the fulltext for the HEAD revision.  After that, the text
            # corresponding to revision 1.N is the delta (1.<N+1> ->
            # 1.<N>)).  We have to invert the deltas here so that we can
            # read the revisions out in dependency order; that is, for
            # revision 1.1 we want the fulltext, and for revision 1.<N> we
            # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
            # compute the delta for a revision until we see its logical
            # parent.  When we finally see revision 1.1 (which is recognized
            # because it doesn't have a parent), we can record the diff (1.1
            # -> 1.2) for revision 1.2, and also the fulltext for 1.1.

            if cvs_rev.next_id is None:
                # This is HEAD, as fulltext.  Initialize the RCSStream so
                # that we can compute deltas backwards in time.
                self._stream = RCSStream(text)
            else:
                # Any other trunk revision is a backward delta.  Apply the
                # delta to the RCSStream to mutate it to the contents of this
                # revision, and also to get the reverse delta, which we store
                # as the forward delta of our child revision.
                try:
                    text = self._stream.invert_diff(text)
                except MalformedDeltaException, (msg):
                    Log().error(
                        'Malformed RCS delta in %s, revision %s: %s' %
                        (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
                    raise RuntimeError
                text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
                self._writeout(text_record, text)

            if cvs_rev.prev_id is None:
                # This is revision 1.1.  Write its fulltext:
                text_record = FullTextRecord(cvs_rev.id)
                self._writeout(text_record, self._stream.get_text())

                # There will be no more trunk revisions delivered, so free the
                # RCSStream.
                del self._stream

        else:
Example #7
0
class InternalRevisionRecorder(RevisionRecorder):
  """A RevisionRecorder that reconstructs the fulltext internally."""

  def __init__(self, compress):
    RevisionRecorder.__init__(self)
    self._compress = compress

  def register_artifacts(self, which_pass):
    artifact_manager.register_temp_file(
        config.RCS_DELTAS_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
    artifact_manager.register_temp_file(
        config.RCS_TREES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)

  def start(self):
    ser = MarshalSerializer()
    if self._compress:
      ser = CompressingSerializer(ser)
    self._rcs_deltas = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
        DB_OPEN_NEW, ser)
    primer = (FullTextRecord, DeltaTextRecord)
    self._rcs_trees = IndexedDatabase(
        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
        DB_OPEN_NEW, PrimedPickleSerializer(primer))

  def start_file(self, cvs_file_items):
    self._cvs_file_items = cvs_file_items

    # A map from cvs_rev_id to TextRecord instance:
    self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())

  def record_text(self, cvs_rev, log, text):
    if isinstance(cvs_rev.lod, Trunk):
      # On trunk, revisions are encountered in reverse order (1.<N>
      # ... 1.1) and deltas are inverted.  The first text that we see
      # is the fulltext for the HEAD revision.  After that, the text
      # corresponding to revision 1.N is the delta (1.<N+1> ->
      # 1.<N>)).  We have to invert the deltas here so that we can
      # read the revisions out in dependency order; that is, for
      # revision 1.1 we want the fulltext, and for revision 1.<N> we
      # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
      # compute the delta for a revision until we see its logical
      # parent.  When we finally see revision 1.1 (which is recognized
      # because it doesn't have a parent), we can record the diff (1.1
      # -> 1.2) for revision 1.2, and also the fulltext for 1.1.

      if cvs_rev.next_id is None:
        # This is HEAD, as fulltext.  Initialize the RCSStream so
        # that we can compute deltas backwards in time.
        self._stream = RCSStream(text)
      else:
        # Any other trunk revision is a backward delta.  Apply the
        # delta to the RCSStream to mutate it to the contents of this
        # revision, and also to get the reverse delta, which we store
        # as the forward delta of our child revision.
        try:
          text = self._stream.invert_diff(text)
        except MalformedDeltaException, (msg):
          Log().error('Malformed RCS delta in %s, revision %s: %s'
                      % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
                         msg))
          raise RuntimeError
        text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
        self._writeout(text_record, text)

      if cvs_rev.prev_id is None:
        # This is revision 1.1.  Write its fulltext:
        text_record = FullTextRecord(cvs_rev.id)
        self._writeout(text_record, self._stream.get_text())

        # There will be no more trunk revisions delivered, so free the
        # RCSStream.
        del self._stream

    else:
Example #8
0
class _Sink(Sink):
  def __init__(self, revision_collector, cvs_file_items):
    self.revision_collector = revision_collector
    self.cvs_file_items = cvs_file_items

    # A map {rev : base_rev} indicating that the text for rev is
    # stored in CVS as a delta relative to base_rev.
    self.base_revisions = {}

    # The revision that is stored with its fulltext in CVS (usually
    # the oldest revision on trunk):
    self.head_revision = None

    # The first logical revision on trunk (usually '1.1'):
    self.revision_1_1 = None

    # Keep track of the revisions whose revision info has been seen so
    # far (to avoid repeated revision info blocks):
    self.revisions_seen = set()

  def set_head_revision(self, revision):
    self.head_revision = revision

  def define_revision(
        self, revision, timestamp, author, state, branches, next
        ):
    if next:
      self.base_revisions[next] = revision
    else:
      if is_trunk_revision(revision):
        self.revision_1_1 = revision

    for branch in branches:
      self.base_revisions[branch] = revision

  def set_revision_info(self, revision, log, text):
    if revision in self.revisions_seen:
      # One common form of CVS repository corruption is that the
      # Deltatext block for revision 1.1 appears twice.  CollectData
      # has already warned about this problem; here we can just ignore
      # it.
      return
    else:
      self.revisions_seen.add(revision)

    cvs_rev_id = self.cvs_file_items.original_ids[revision]
    if is_trunk_revision(revision):
      # On trunk, revisions are encountered in reverse order (1.<N>
      # ... 1.1) and deltas are inverted.  The first text that we see
      # is the fulltext for the HEAD revision.  After that, the text
      # corresponding to revision 1.N is the delta (1.<N+1> ->
      # 1.<N>)).  We have to invert the deltas here so that we can
      # read the revisions out in dependency order; that is, for
      # revision 1.1 we want the fulltext, and for revision 1.<N> we
      # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
      # compute the delta for a revision until we see its logical
      # parent.  When we finally see revision 1.1 (which is recognized
      # because it doesn't have a parent), we can record the diff (1.1
      # -> 1.2) for revision 1.2, and also the fulltext for 1.1.

      if revision == self.head_revision:
        # This is HEAD, as fulltext.  Initialize the RCSStream so
        # that we can compute deltas backwards in time.
        self._rcs_stream = RCSStream(text)
        self._rcs_stream_revision = revision
      else:
        # Any other trunk revision is a backward delta.  Apply the
        # delta to the RCSStream to mutate it to the contents of this
        # revision, and also to get the reverse delta, which we store
        # as the forward delta of our child revision.
        try:
          text = self._rcs_stream.invert_diff(text)
        except MalformedDeltaException, e:
          logger.error(
              'Malformed RCS delta in %s, revision %s: %s'
              % (self.cvs_file_items.cvs_file.rcs_path, revision, e)
              )
          raise RuntimeError()
        text_record = DeltaTextRecord(
            self.cvs_file_items.original_ids[self._rcs_stream_revision],
            cvs_rev_id
            )
        self.revision_collector._writeout(text_record, text)
        self._rcs_stream_revision = revision

      if revision == self.revision_1_1:
        # This is revision 1.1.  Write its fulltext:
        text_record = FullTextRecord(cvs_rev_id)
        self.revision_collector._writeout(
            text_record, self._rcs_stream.get_text()
            )

        # There will be no more trunk revisions delivered, so free the
        # RCSStream.
        del self._rcs_stream
        del self._rcs_stream_revision

    else: