コード例 #1
0
    def __init__(self):
        # The revision number to assign to the next new SVNCommit.
        self.revnum_generator = KeyGenerator()

        # A set containing the Projects that have already been
        # initialized:
        self._initialized_projects = set()
コード例 #2
0
ファイル: collect_data.py プロジェクト: SEGaL-Group/scm2pgsql
  def __init__(self, revision_recorder, stats_keeper):
    self.revision_recorder = revision_recorder
    self._cvs_item_store = NewCVSItemStore(
        artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
    self.metadata_db = MetadataDatabase(
        artifact_manager.get_temp_file(config.METADATA_STORE),
        artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
        DB_OPEN_NEW,
        )
    self.metadata_logger = MetadataLogger(self.metadata_db)
    self.fatal_errors = []
    self.num_files = 0
    self.symbol_stats = SymbolStatisticsCollector()
    self.stats_keeper = stats_keeper

    # Key generator for CVSFiles:
    self.file_key_generator = KeyGenerator()

    # Key generator for CVSItems:
    self.item_key_generator = KeyGenerator()

    # Key generator for Symbols:
    self.symbol_key_generator = KeyGenerator()

    self.revision_recorder.start()
コード例 #3
0
ファイル: metadata_database.py プロジェクト: saminigod/cygwin
  def __init__(self, metadata_db):
    self._metadata_db = metadata_db

    # A map { digest : id }:
    self._digest_to_id = {}

    # A key_generator to generate keys for metadata that haven't been
    # seen yet:
    self.key_generator = KeyGenerator()
コード例 #4
0
    def __init__(
        self,
        dump_filename,
        revision_writer,
        max_merges=None,
        author_transforms=None,
    ):
        """Constructor.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    MAX_MERGES can be set to an integer telling the maximum number of
    parents that can be merged into a commit at once (aside from the
    natural parent).  If it is set to None, then there is no limit.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    """

        self.dump_filename = dump_filename
        self.revision_writer = revision_writer
        self.max_merges = max_merges

        def to_utf8(s):
            if isinstance(s, unicode):
                return s.encode('utf8')
            else:
                return s

        self.author_transforms = {}
        if author_transforms is not None:
            for (cvsauthor, (
                    name,
                    email,
            )) in author_transforms.iteritems():
                cvsauthor = to_utf8(cvsauthor)
                name = to_utf8(name)
                email = to_utf8(email)
                self.author_transforms[cvsauthor] = (
                    name,
                    email,
                )

        self._mirror = RepositoryMirror()

        self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
コード例 #5
0
 def start(self):
     self.revision_reader.start()
     if self.blob_filename is None:
         self.dump_file = open(
             artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE),
             'wb',
         )
     else:
         self.dump_file = open(self.blob_filename, 'wb')
     self._mark_generator = KeyGenerator()
コード例 #6
0
 def start(self):
     self._mark_generator = KeyGenerator()
     logger.normal('Starting generate_blobs.py...')
     self._popen = subprocess.Popen(
         [
             sys.executable,
             os.path.join(os.path.dirname(__file__), 'generate_blobs.py'),
             self.blob_filename,
         ],
         stdin=subprocess.PIPE,
     )
コード例 #7
0
 def start(self):
   self._mark_generator = KeyGenerator()
   logger.normal('Starting generate_blobs.py...')
   if self.blob_filename is None:
     blob_filename = artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE)
   else:
     blob_filename = self.blob_filename
   self._pipe = subprocess.Popen(
       [
           sys.executable,
           os.path.join(os.path.dirname(__file__), 'generate_blobs.py'),
           blob_filename,
           ],
       stdin=subprocess.PIPE,
       )
コード例 #8
0
 def start(self):
     self._mark_generator = KeyGenerator()
     logger.normal("Starting generate_blobs.py...")
     self._popen = subprocess.Popen(
         [sys.executable, os.path.join(os.path.dirname(__file__), "generate_blobs.py"), self.blob_filename],
         stdin=subprocess.PIPE,
     )
コード例 #9
0
class GitRevisionCollector(RevisionCollector):
    """Output file revisions to git-fast-import."""
    def __init__(self, blob_filename, revision_reader):
        self.blob_filename = blob_filename
        self.revision_reader = revision_reader

    def register_artifacts(self, which_pass):
        self.revision_reader.register_artifacts(which_pass)

    def start(self):
        self.revision_reader.start()
        self.dump_file = open(self.blob_filename, 'wb')
        self._mark_generator = KeyGenerator()

    def _process_revision(self, cvs_rev):
        """Write the revision fulltext to a blob if it is not dead."""

        if isinstance(cvs_rev, CVSRevisionDelete):
            # There is no need to record a delete revision, and its token
            # will never be needed:
            return

        # FIXME: We have to decide what to do about keyword substitution
        # and eol_style here:
        fulltext = self.revision_reader.get_content(cvs_rev)

        mark = self._mark_generator.gen_id()
        self.dump_file.write('blob\n')
        self.dump_file.write('mark :%d\n' % (mark, ))
        self.dump_file.write('data %d\n' % (len(fulltext), ))
        self.dump_file.write(fulltext)
        self.dump_file.write('\n')
        cvs_rev.revision_reader_token = mark

    def _process_symbol(self, cvs_symbol, cvs_file_items):
        """Record the original source of CVS_SYMBOL.

    Determine the original revision source of CVS_SYMBOL, and store it
    as the symbol's revision_reader_token."""

        cvs_source = cvs_symbol.get_cvs_revision_source(cvs_file_items)
        cvs_symbol.revision_reader_token = cvs_source.revision_reader_token

    def process_file(self, cvs_file_items):
        for lod_items in cvs_file_items.iter_lods():
            for cvs_rev in lod_items.cvs_revisions:
                self._process_revision(cvs_rev)

        # Now that all CVSRevisions' revision_reader_tokens are set,
        # iterate through symbols and set their tokens to those of their
        # original source revisions:
        for lod_items in cvs_file_items.iter_lods():
            if lod_items.cvs_branch is not None:
                self._process_symbol(lod_items.cvs_branch, cvs_file_items)
            for cvs_tag in lod_items.cvs_tags:
                self._process_symbol(cvs_tag, cvs_file_items)

    def finish(self):
        self.revision_reader.finish()
        self.dump_file.close()
コード例 #10
0
  def __init__(self):
    # The revision number to assign to the next new SVNCommit.
    self.revnum_generator = KeyGenerator()

    # A set containing the Projects that have already been
    # initialized:
    self._initialized_projects = set()
コード例 #11
0
  def open(self):
    """Set up the RepositoryMirror and prepare it for commits."""

    self._key_generator = KeyGenerator()

    # A map from LOD to LODHistory instance for all LODs that have
    # been referenced so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track file existence, not file contents.)
    self._node_db = _NodeDatabase()

    # Start at revision 0 without a root node.
    self._youngest = 0
コード例 #12
0
 def start(self):
   self.revision_reader.start()
   if self.blob_filename is None:
     self.dump_file = open(
         artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'wb',
         )
   else:
     self.dump_file = open(self.blob_filename, 'wb')
   self._mark_generator = KeyGenerator()
コード例 #13
0
class ExternalBlobGenerator(RevisionCollector):
    """Have generate_blobs.py output file revisions to a blob file."""

    def __init__(self, blob_filename):
        self.blob_filename = blob_filename

    def start(self):
        self._mark_generator = KeyGenerator()
        logger.normal("Starting generate_blobs.py...")
        self._popen = subprocess.Popen(
            [sys.executable, os.path.join(os.path.dirname(__file__), "generate_blobs.py"), self.blob_filename],
            stdin=subprocess.PIPE,
        )

    def _process_symbol(self, cvs_symbol, cvs_file_items):
        """Record the original source of CVS_SYMBOL.

    Determine the original revision source of CVS_SYMBOL, and store it
    as the symbol's revision_reader_token."""

        cvs_source = cvs_symbol.get_cvs_revision_source(cvs_file_items)
        cvs_symbol.revision_reader_token = cvs_source.revision_reader_token

    def process_file(self, cvs_file_items):
        marks = {}
        for lod_items in cvs_file_items.iter_lods():
            for cvs_rev in lod_items.cvs_revisions:
                if not isinstance(cvs_rev, CVSRevisionDelete):
                    mark = self._mark_generator.gen_id()
                    cvs_rev.revision_reader_token = mark
                    marks[cvs_rev.rev] = mark

        # A separate pickler is used for each dump(), so that its memo
        # doesn't grow very large.  The default ASCII protocol is used so
        # that this works without changes on systems that distinguish
        # between text and binary files.
        pickle.dump((cvs_file_items.cvs_file.rcs_path, marks), self._popen.stdin)
        self._popen.stdin.flush()

        # Now that all CVSRevisions' revision_reader_tokens are set,
        # iterate through symbols and set their tokens to those of their
        # original source revisions:
        for lod_items in cvs_file_items.iter_lods():
            if lod_items.cvs_branch is not None:
                self._process_symbol(lod_items.cvs_branch, cvs_file_items)
            for cvs_tag in lod_items.cvs_tags:
                self._process_symbol(cvs_tag, cvs_file_items)

    def finish(self):
        self._popen.stdin.close()
        logger.normal("Waiting for generate_blobs.py to finish...")
        returncode = self._popen.wait()
        if returncode:
            raise FatalError("generate_blobs.py failed with return code %s." % (returncode,))
        else:
            logger.normal("generate_blobs.py is done.")
コード例 #14
0
ファイル: metadata_database.py プロジェクト: saminigod/cygwin
class MetadataLogger:
  """Store and generate IDs for the metadata associated with CVSRevisions.

  We want CVSRevisions that might be able to be combined to have the
  same metadata ID, so we want a one-to-one relationship id <->
  metadata.  We could simply construct a map {metadata : id}, but the
  map would grow too large.  Therefore, we generate a digest
  containing the significant parts of the metadata, and construct a
  map {digest : id}.

  To get the ID for a new set of metadata, we first create the digest.
  If there is already an ID registered for that digest, we simply
  return it.  If not, we generate a new ID, store the metadata in the
  metadata database under that ID, record the mapping {digest : id},
  and return the new id.

  What metadata is included in the digest?  The author, log_msg,
  project_id (if Ctx().cross_project_commits is not set), and
  branch_name (if Ctx().cross_branch_commits is not set)."""

  def __init__(self, metadata_db):
    self._metadata_db = metadata_db

    # A map { digest : id }:
    self._digest_to_id = {}

    # A key_generator to generate keys for metadata that haven't been
    # seen yet:
    self.key_generator = KeyGenerator()

  def store(self, project, branch_name, author, log_msg):
    """Store the metadata and return its id.

    Locate the record for a commit with the specified (PROJECT,
    BRANCH_NAME, AUTHOR, LOG_MSG) and return its id.  (Depending on
    policy, not all of these items are necessarily used when creating
    the unique id.)  If there is no such record, create one and return
    its newly-generated id."""

    key = [author, log_msg]
    if not Ctx().cross_project_commits:
      key.append('%x' % project.id)
    if not Ctx().cross_branch_commits:
      key.append(branch_name or '')

    digest = sha.new('\0'.join(key)).digest()
    try:
      # See if it is already known:
      return self._digest_to_id[digest]
    except KeyError:
      id = self.key_generator.gen_id()
      self._digest_to_id[digest] = id
      self._metadata_db[id] = Metadata(id, author, log_msg)
      return id
コード例 #15
0
  def __init__(
        self, revision_writer,
        dump_filename=None,
        author_transforms=None,
        tie_tag_fixup_branches=False,
        ):
    """Constructor.

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)  If it is None, then
    the output is written to stdout.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
    fixup branch, it should be psuedo-merged (ancestry linked but no
    content changes) back into its source branch, to dispose of the
    open head.

    """
    DVCSOutputOption.__init__(self)
    self.dump_filename = dump_filename
    self.revision_writer = revision_writer

    self.author_transforms = self.normalize_author_transforms(
        author_transforms
        )

    self.tie_tag_fixup_branches = tie_tag_fixup_branches

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
コード例 #16
0
class Substituter:
    def __init__(self, template):
        self.template = template
        self.key_generator = KeyGenerator(1)

        # A map from old values to new ones.
        self.substitutions = {}

    def get_substitution(self, s):
        r = self.substitutions.get(s)
        if r == None:
            r = self.template % self.key_generator.gen_id()
            self.substitutions[s] = r
        return r
コード例 #17
0
ファイル: destroy_repository.py プロジェクト: mhagger/cvs2svn
class Substituter:
    def __init__(self, template):
        self.template = template
        self.key_generator = KeyGenerator(1)

        # A map from old values to new ones.
        self.substitutions = {}

    def get_substitution(self, s):
        r = self.substitutions.get(s)
        if r == None:
            r = self.template % self.key_generator.gen_id()
            self.substitutions[s] = r
        return r
コード例 #18
0
    def open(self):
        """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

        self._key_generator = KeyGenerator()

        self._delegates = []

        # A map from LOD to LODHistory instance for all LODs that have
        # been defines so far:
        self._lod_histories = {}

        # This corresponds to the 'nodes' table in a Subversion fs.  (We
        # don't need a 'representations' or 'strings' table because we
        # only track metadata, not file contents.)
        self._nodes_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(
                config.SVN_MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=_NodeSerializer())

        # Start at revision 0 without a root node.  It will be created
        # by _open_writable_root_node.
        self._youngest = 0
コード例 #19
0
  def __init__(
        self, dump_filename, revision_writer,
        max_merges=None, author_transforms=None,
        ):
    """Constructor.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    MAX_MERGES can be set to an integer telling the maximum number of
    parents that can be merged into a commit at once (aside from the
    natural parent).  If it is set to None, then there is no limit.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    """

    self.dump_filename = dump_filename
    self.revision_writer = revision_writer
    self.max_merges = max_merges

    def to_utf8(s):
      if isinstance(s, unicode):
        return s.encode('utf8')
      else:
        return s

    self.author_transforms = {}
    if author_transforms is not None:
      for (cvsauthor, (name, email,)) in author_transforms.iteritems():
        cvsauthor = to_utf8(cvsauthor)
        name = to_utf8(name)
        email = to_utf8(email)
        self.author_transforms[cvsauthor] = (name, email,)

    self._mirror = RepositoryMirror()

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
コード例 #20
0
ファイル: git_output_option.py プロジェクト: mhagger/cvs2svn
  def __init__(
        self, revision_writer,
        dump_filename=None,
        author_transforms=None,
        tie_tag_fixup_branches=False,
        ):
    """Constructor.

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)  If it is None, then
    the output is written to stdout.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
    fixup branch, it should be psuedo-merged (ancestry linked but no
    content changes) back into its source branch, to dispose of the
    open head.

    """
    DVCSOutputOption.__init__(self)
    self.dump_filename = dump_filename
    self.revision_writer = revision_writer

    self.author_transforms = self.normalize_author_transforms(
        author_transforms
        )

    self.tie_tag_fixup_branches = tie_tag_fixup_branches

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
コード例 #21
0
  def open(self):
    """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

    self._key_generator = KeyGenerator()

    self._delegates = [ ]

    # A map from LOD to LODHistory instance for all LODs that have
    # been defines so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track metadata, not file contents.)
    self._nodes_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=_NodeSerializer()
        )

    # Start at revision 0 without a root node.  It will be created
    # by _open_writable_root_node.
    self._youngest = 0
コード例 #22
0
ファイル: git_output_option.py プロジェクト: robinst/cvs2svn
class GitOutputOption(DVCSOutputOption):
  """An OutputOption that outputs to a git-fast-import formatted file.

  Members:

    dump_filename -- (string) the name of the file to which the
        git-fast-import commands for defining revisions will be
        written.

    author_transforms -- a map from CVS author names to git full name
        and email address.  See
        DVCSOutputOption.normalize_author_transforms() for information
        about the form of this parameter.

  """

  name = "Git"

  # The first mark number used for git-fast-import commit marks.  This
  # value needs to be large to avoid conflicts with blob marks.
  _first_commit_mark = 1000000000

  def __init__(
        self, dump_filename, revision_writer,
        author_transforms=None,
        tie_tag_fixup_branches=False,
        ):
    """Constructor.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
    fixup branch, it should be psuedo-merged (ancestry linked but no
    content changes) back into its source branch, to dispose of the
    open head.

    """
    DVCSOutputOption.__init__(self)
    self.dump_filename = dump_filename
    self.revision_writer = revision_writer

    self.author_transforms = self.normalize_author_transforms(
        author_transforms
        )

    self.tie_tag_fixup_branches = tie_tag_fixup_branches

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)

  def register_artifacts(self, which_pass):
    DVCSOutputOption.register_artifacts(self, which_pass)
    self.revision_writer.register_artifacts(which_pass)

  def check_symbols(self, symbol_map):
    # FIXME: What constraints does git impose on symbols?
    pass

  def setup(self, svn_rev_count):
    DVCSOutputOption.setup(self, svn_rev_count)
    self.f = open(self.dump_filename, 'wb')

    # The youngest revnum that has been committed so far:
    self._youngest = 0

    # A map {lod : [(revnum, mark)]} giving each of the revision
    # numbers in which there was a commit to lod, and the mark active
    # at the end of the revnum.
    self._marks = {}

    self.revision_writer.start(self._mirror, self.f)

  def _create_commit_mark(self, lod, revnum):
    mark = self._mark_generator.gen_id()
    self._set_lod_mark(lod, revnum, mark)
    return mark

  def _set_lod_mark(self, lod, revnum, mark):
    """Record MARK as the status of LOD for REVNUM.

    If there is already an entry for REVNUM, overwrite it.  If not,
    append a new entry to the self._marks list for LOD."""

    assert revnum >= self._youngest
    entry = (revnum, mark)
    try:
      modifications = self._marks[lod]
    except KeyError:
      # This LOD hasn't appeared before; create a new list and add the
      # entry:
      self._marks[lod] = [entry]
    else:
      # A record exists, so it necessarily has at least one element:
      if modifications[-1][0] == revnum:
        modifications[-1] = entry
      else:
        modifications.append(entry)
    self._youngest = revnum

  def _get_author(self, svn_commit):
    """Return the author to be used for SVN_COMMIT.

    Return the author as a UTF-8 string in the form needed by git
    fast-import; that is, 'name <email>'."""

    cvs_author = svn_commit.get_author()
    return self._map_author(cvs_author)

  def _map_author(self, cvs_author):
    return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))

  @staticmethod
  def _get_log_msg(svn_commit):
    return svn_commit.get_log_msg()

  def process_initial_project_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)
    self._mirror.end_commit()

  def process_primary_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    lods = set()
    for cvs_rev in svn_commit.get_cvs_items():
      lods.add(cvs_rev.lod)
    if len(lods) != 1:
      raise InternalError('Commit affects %d LODs' % (len(lods),))
    lod = lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    if isinstance(lod, Trunk):
      # FIXME: is this correct?:
      self.f.write('commit refs/heads/master\n')
    else:
      self.f.write('commit refs/heads/%s\n' % (lod.name,))
    self.f.write(
        'mark :%d\n'
        % (self._create_commit_mark(lod, svn_commit.revnum),)
        )
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    for cvs_rev in svn_commit.get_cvs_items():
      self.revision_writer.process_revision(cvs_rev, post_commit=False)

    self.f.write('\n')
    self._mirror.end_commit()

  def process_post_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    source_lods = set()
    for cvs_rev in svn_commit.cvs_revs:
      source_lods.add(cvs_rev.lod)
    if len(source_lods) != 1:
      raise InternalError('Commit is from %d LODs' % (len(source_lods),))
    source_lod = source_lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    # FIXME: is this correct?:
    self.f.write('commit refs/heads/master\n')
    self.f.write(
        'mark :%d\n'
        % (self._create_commit_mark(None, svn_commit.revnum),)
        )
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    self.f.write(
        'merge :%d\n'
        % (self._get_source_mark(source_lod, svn_commit.revnum),)
        )
    for cvs_rev in svn_commit.cvs_revs:
      self.revision_writer.process_revision(cvs_rev, post_commit=True)

    self.f.write('\n')
    self._mirror.end_commit()

  def _get_source_mark(self, source_lod, revnum):
    """Return the mark active on SOURCE_LOD at the end of REVNUM."""

    modifications = self._marks[source_lod]
    i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
    (revnum, mark) = modifications[i]
    return mark

  def describe_lod_to_user(self, lod):
    """This needs to make sense to users of the fastimported result."""
    if isinstance(lod, Trunk):
      return 'master'
    else:
      return lod.name

  def _describe_commit(self, svn_commit, lod):
      author = self._map_author(svn_commit.get_author())
      if author.endswith(" <>"):
        author = author[:-3]
      date = time.strftime(
          "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
          )
      log_msg = svn_commit.get_log_msg()
      if log_msg.find('\n') != -1:
        log_msg = log_msg[:log_msg.index('\n')]
      return "%s %s %s '%s'" % (
          self.describe_lod_to_user(lod), date, author, log_msg,)

  def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    # There are two distinct cases we need to care for here:
    #  1. initial creation of a LOD
    #  2. fixup of an existing LOD to include more files, because the LOD in
    #     CVS was created piecemeal over time, with intervening commits

    # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
    # might be technically more correct (though _get_lod_history is currently
    # underscore-private)
    is_initial_lod_creation = svn_commit.symbol not in self._marks

    # Create the mark, only after the check above
    mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)

    if is_initial_lod_creation:
      # Get the primary parent
      p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
      try:
        p_source_node = self._mirror.get_old_lod_directory(
            p_source_lod, p_source_revnum
            )
      except KeyError:
        raise InternalError('Source %r does not exist' % (p_source_lod,))
      cvs_files_to_delete = set(self._get_all_files(p_source_node))

      for (source_revnum, source_lod, cvs_symbols,) in source_groups:
        for cvs_symbol in cvs_symbols:
          cvs_files_to_delete.discard(cvs_symbol.cvs_file)

    # Write a trailer to the log message which describes the cherrypicks that
    # make up this symbol creation.
    log_msg += "\n"
    if is_initial_lod_creation:
      log_msg += "\nSprout from %s" % (
          self._describe_commit(
              Ctx()._persistence_manager.get_svn_commit(p_source_revnum),
              p_source_lod
              ),
          )
    for (source_revnum, source_lod, cvs_symbols,) \
            in source_groups[(is_initial_lod_creation and 1 or 0):]:
      log_msg += "\nCherrypick from %s:" % (
          self._describe_commit(
              Ctx()._persistence_manager.get_svn_commit(source_revnum),
              source_lod
              ),
          )
      for cvs_path in sorted(
            cvs_symbol.cvs_file.cvs_path for cvs_symbol in cvs_symbols
            ):
        log_msg += "\n    %s" % (cvs_path,)
    if is_initial_lod_creation:
      if cvs_files_to_delete:
        log_msg += "\nDelete:"
        for cvs_path in sorted(
              cvs_file.cvs_path for cvs_file in cvs_files_to_delete
              ):
          log_msg += "\n    %s" % (cvs_path,)

    self.f.write('commit %s\n' % (git_branch,))
    self.f.write('mark :%d\n' % (mark,))
    self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))

    # Only record actual DVCS ancestry for the primary sprout parent,
    # all the rest are effectively cherrypicks.
    if is_initial_lod_creation:
      self.f.write(
          'from :%d\n'
          % (self._get_source_mark(p_source_lod, p_source_revnum),)
          )

    for (source_revnum, source_lod, cvs_symbols,) in source_groups:
      for cvs_symbol in cvs_symbols:
        self.revision_writer.branch_file(cvs_symbol)

    if is_initial_lod_creation:
      for cvs_file in cvs_files_to_delete:
        self.f.write('D %s\n' % (cvs_file.cvs_path,))

    self.f.write('\n')
    return mark

  def process_branch_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)

    source_groups = self._get_source_groups(svn_commit)
    if self._is_simple_copy(svn_commit, source_groups):
      (source_revnum, source_lod, cvs_symbols) = source_groups[0]
      logger.debug(
          '%s will be created via a simple copy from %s:r%d'
          % (svn_commit.symbol, source_lod, source_revnum,)
          )
      mark = self._get_source_mark(source_lod, source_revnum)
      self._set_symbol(svn_commit.symbol, mark)
      self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
      self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
    else:
      logger.debug(
          '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
          )
      self._process_symbol_commit(
          svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
          source_groups,
          )

    self._mirror.end_commit()

  def _set_symbol(self, symbol, mark):
    if isinstance(symbol, Branch):
      category = 'heads'
    elif isinstance(symbol, Tag):
      category = 'tags'
    else:
      raise InternalError()
    self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
    self.f.write('from :%d\n' % (mark,))

  def get_tag_fixup_branch_name(self, svn_commit):
    # The branch name to use for the "tag fixup branches".  The
    # git-fast-import documentation suggests using 'TAG_FIXUP'
    # (outside of the refs/heads namespace), but this is currently
    # broken.  Use a name containing '.', which is not allowed in CVS
    # symbols, to avoid conflicts (though of course a conflict could
    # still result if the user requests symbol transformations).
    return 'refs/heads/TAG.FIXUP'

  def process_tag_commit(self, svn_commit):
    # FIXME: For now we create a fixup branch with the same name as
    # the tag, then the tag.  We never delete the fixup branch.
    self._mirror.start_commit(svn_commit.revnum)

    source_groups = self._get_source_groups(svn_commit)
    if self._is_simple_copy(svn_commit, source_groups):
      (source_revnum, source_lod, cvs_symbols) = source_groups[0]
      logger.debug(
          '%s will be created via a simple copy from %s:r%d'
          % (svn_commit.symbol, source_lod, source_revnum,)
          )
      mark = self._get_source_mark(source_lod, source_revnum)
      self._set_symbol(svn_commit.symbol, mark)
      self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
      self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
    else:
      logger.debug(
          '%s will be created via a fixup branch' % (svn_commit.symbol,)
          )

      fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)

      # Create the fixup branch (which might involve making more than
      # one commit):
      mark = self._process_symbol_commit(
          svn_commit, fixup_branch_name, source_groups
          )

      # Store the mark of the last commit to the fixup branch as the
      # value of the tag:
      self._set_symbol(svn_commit.symbol, mark)
      self.f.write('reset %s\n' % (fixup_branch_name,))
      self.f.write('\n')

      if self.tie_tag_fixup_branches:
        source_lod = source_groups[0][1]
        source_lod_git_branch = \
            'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)

        mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
        author = self._map_author(Ctx().username)
        log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)

        self.f.write('commit %s\n' % (source_lod_git_branch,))
        self.f.write('mark :%d\n' % (mark2,))
        self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
        self.f.write('data %d\n' % (len(log_msg),))
        self.f.write('%s\n' % (log_msg,))

        self.f.write(
            'merge :%d\n'
            % (mark,)
            )

        self.f.write('\n')

    self._mirror.end_commit()

  def _get_log_msg_for_ancestry_tie(self, svn_commit):
    return Ctx().text_wrapper.fill(
        Ctx().tie_tag_ancestry_message % {
            'symbol_name' : svn_commit.symbol.name,
            }
        )

  def cleanup(self):
    DVCSOutputOption.cleanup(self)
    self.revision_writer.finish()
    self.f.close()
    del self.f
コード例 #23
0
***THE CVS REPOSITORY WILL BE DESTROYED***

CVSREPO is the path to a CVS repository.

TEST_COMMAND is a command that runs successfully (i.e., with exit
code '0') if the bug is still present, and fails if the bug is
absent.
"""


verbose = 1

tmpdir = 'shrink_test_case-tmp'

file_key_generator = KeyGenerator(1)


def get_tmp_filename():
    return os.path.join(tmpdir, 'f%07d.tmp' % file_key_generator.gen_id())


class CommandFailedException(Exception):
    pass


def command(cmd, *args):
    if verbose >= 2:
        sys.stderr.write('Running: %s %s...' % (cmd, ' '.join(args),))
    retval = os.spawnlp(os.P_WAIT, cmd, cmd, *args)
    if retval:
コード例 #24
0
 def start(self):
     self.dump_file = open(self.blob_filename, 'wb')
     self._mark_generator = KeyGenerator()
コード例 #25
0
    def __init__(self, template):
        self.template = template
        self.key_generator = KeyGenerator(1)

        # A map from old values to new ones.
        self.substitutions = {}
コード例 #26
0
class GitRevisionRecorder(FulltextRevisionRecorder):
  """Output file revisions to git-fast-import."""

  def __init__(self, blob_filename):
    self.blob_filename = blob_filename

  def start(self):
    self.dump_file = open(self.blob_filename, 'wb')
    self._mark_generator = KeyGenerator()

  def start_file(self, cvs_file_items):
    self._cvs_file_items = cvs_file_items

  def _get_original_source(self, cvs_rev):
    """Return the original source of the contents of CVS_REV.

    Return the first non-delete CVSRevision with the same contents as
    CVS_REV.  'First' here refers to deltatext order; i.e., the very
    first revision is HEAD on trunk, then backwards to the root of a
    branch, then out to the tip of a branch.

    The candidates are all revisions along the CVS delta-dependency
    chain until the next one that has a deltatext (inclusive).  Of the
    candidates, CVSRevisionDeletes are disqualified because, even
    though CVS records their contents, it is impossible to extract
    their fulltext using commands like 'cvs checkout -p'.

    If there is no other CVSRevision that has the same content, return
    CVS_REV itself."""

    # Keep track of the "best" source CVSRevision found so far:
    best_source_rev = None

    for cvs_rev in itertools.chain(
          [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)
          ):
      if not isinstance(cvs_rev, CVSRevisionDelete):
        best_source_rev = cvs_rev

      if cvs_rev.deltatext_exists:
        break

    return best_source_rev

  def record_fulltext(self, cvs_rev, log, fulltext):
    """Write the fulltext to a blob if it is original and not a delete.

    The reason we go to this trouble is to avoid writing the same file
    contents multiple times for a string of revisions that don't have
    deltatexts (as, for example, happens with dead revisions and
    imported revisions)."""

    if isinstance(cvs_rev, CVSRevisionDelete):
      # There is no need to record a delete revision, and its token
      # will never be needed:
      return None

    source = self._get_original_source(cvs_rev)

    if source.id == cvs_rev.id:
      # Revision is its own source; write it out:
      mark = self._mark_generator.gen_id()
      self.dump_file.write('blob\n')
      self.dump_file.write('mark :%d\n' % (mark,))
      self.dump_file.write('data %d\n' % (len(fulltext),))
      self.dump_file.write(fulltext)
      self.dump_file.write('\n')
      return mark
    else:
      # Return as revision_recorder_token the CVSRevision.id of the
      # original source revision:
      return source.revision_recorder_token

  def finish_file(self, cvs_file_items):
    # Determine the original source of each CVSSymbol, and store it as
    # the symbol's revision_recorder_token.
    for cvs_item in cvs_file_items.values():
      if isinstance(cvs_item, CVSSymbol):
        cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
        cvs_item.revision_recorder_token = cvs_source.revision_recorder_token

    del self._cvs_file_items

  def finish(self):
    self.dump_file.close()
コード例 #27
0
 def start(self):
   self.dump_file = open(self.blob_filename, 'wb')
   self._mark_generator = KeyGenerator()
コード例 #28
0
ファイル: destroy_repository.py プロジェクト: mhagger/cvs2svn
    def __init__(self, template):
        self.template = template
        self.key_generator = KeyGenerator(1)

        # A map from old values to new ones.
        self.substitutions = {}
コード例 #29
0
class SVNCommitCreator:
  """This class creates and yields SVNCommits via process_changeset()."""

  def __init__(self):
    # The revision number to assign to the next new SVNCommit.
    self.revnum_generator = KeyGenerator()

    # A set containing the Projects that have already been
    # initialized:
    self._initialized_projects = set()

  def _post_commit(self, cvs_revs, motivating_revnum, timestamp):
    """Generate any SVNCommits needed to follow CVS_REVS.

    That is, handle non-trunk default branches.  A revision on a CVS
    non-trunk default branch is visible in a default CVS checkout of
    HEAD.  So we copy such commits over to Subversion's trunk so that
    checking out SVN trunk gives the same output as checking out of
    CVS's default branch."""

    cvs_revs = [
          cvs_rev
          for cvs_rev in cvs_revs
          if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop)
          ]

    if cvs_revs:
      cvs_revs.sort(
          lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path)
          )
      # Generate an SVNCommit for all of our default branch cvs_revs.
      yield SVNPostCommit(
          motivating_revnum, cvs_revs, timestamp,
          self.revnum_generator.gen_id(),
          )

  def _process_revision_changeset(self, changeset, timestamp):
    """Process CHANGESET, using TIMESTAMP as the commit time.

    Create and yield one or more SVNCommits in the process.  CHANGESET
    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
    for any resulting SVNCommits."""

    if not changeset.cvs_item_ids:
      logger.warn('Changeset has no items: %r' % changeset)
      return

    logger.verbose('-' * 60)
    logger.verbose('CVS Revision grouping:')
    logger.verbose('  Time: %s' % time.ctime(timestamp))

    # Generate an SVNCommit unconditionally.  Even if the only change in
    # this group of CVSRevisions is a deletion of an already-deleted
    # file (that is, a CVS revision in state 'dead' whose predecessor
    # was also in state 'dead'), the conversion will still generate a
    # Subversion revision containing the log message for the second dead
    # revision, because we don't want to lose that information.

    cvs_revs = list(changeset.iter_cvs_items())
    if cvs_revs:
      cvs_revs.sort(lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path))
      svn_commit = SVNPrimaryCommit(
          cvs_revs, timestamp, self.revnum_generator.gen_id()
          )

      yield svn_commit

      for cvs_rev in cvs_revs:
        Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)

      # Generate an SVNPostCommit if we have default branch revs.  If
      # some of the revisions in this commit happened on a non-trunk
      # default branch, then those files have to be copied into trunk
      # manually after being changed on the branch (because the RCS
      # "default branch" appears as head, i.e., trunk, in practice).
      # Unfortunately, Subversion doesn't support copies with sources
      # in the current txn.  All copies must be based in committed
      # revisions.  Therefore, we generate the copies in a new
      # revision.
      for svn_post_commit in self._post_commit(
            cvs_revs, svn_commit.revnum, timestamp
            ):
        yield svn_post_commit

  def _process_tag_changeset(self, changeset, timestamp):
    """Process TagChangeset CHANGESET, producing a SVNTagCommit.

    Filter out CVSTagNoops.  If no CVSTags are left, don't generate a
    SVNTagCommit."""

    if Ctx().trunk_only:
      raise InternalError(
          'TagChangeset encountered during a --trunk-only conversion')

    cvs_tag_ids = [
        cvs_tag.id
        for cvs_tag in changeset.iter_cvs_items()
        if not isinstance(cvs_tag, CVSTagNoop)
        ]
    if cvs_tag_ids:
      yield SVNTagCommit(
          changeset.symbol, cvs_tag_ids, timestamp,
          self.revnum_generator.gen_id(),
          )
    else:
      logger.debug(
          'Omitting %r because it contains only CVSTagNoops' % (changeset,)
          )

  def _process_branch_changeset(self, changeset, timestamp):
    """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.

    Filter out CVSBranchNoops.  If no CVSBranches are left, don't
    generate a SVNBranchCommit."""

    if Ctx().trunk_only:
      raise InternalError(
          'BranchChangeset encountered during a --trunk-only conversion')

    cvs_branches = [
        cvs_branch
        for cvs_branch in changeset.iter_cvs_items()
        if not isinstance(cvs_branch, CVSBranchNoop)
        ]
    if cvs_branches:
      svn_commit = SVNBranchCommit(
          changeset.symbol,
          [cvs_branch.id for cvs_branch in cvs_branches],
          timestamp,
          self.revnum_generator.gen_id(),
          )
      yield svn_commit
      for cvs_branch in cvs_branches:
        Ctx()._symbolings_logger.log_branch_revision(
            cvs_branch, svn_commit.revnum
            )
    else:
      logger.debug(
          'Omitting %r because it contains only CVSBranchNoops' % (changeset,)
          )

  def process_changeset(self, changeset, timestamp):
    """Process CHANGESET, using TIMESTAMP for all of its entries.

    Return a generator that generates the resulting SVNCommits.

    The changesets must be fed to this function in proper dependency
    order."""

    # First create any new projects that might be opened by the
    # changeset:
    projects_opened = \
        changeset.get_projects_opened() - self._initialized_projects
    if projects_opened:
      if Ctx().cross_project_commits:
        yield SVNInitialProjectCommit(
            timestamp, projects_opened, self.revnum_generator.gen_id()
            )
      else:
        for project in projects_opened:
          yield SVNInitialProjectCommit(
              timestamp, [project], self.revnum_generator.gen_id()
              )
      self._initialized_projects.update(projects_opened)

    if isinstance(changeset, OrderedChangeset):
      for svn_commit \
              in self._process_revision_changeset(changeset, timestamp):
        yield svn_commit
    elif isinstance(changeset, TagChangeset):
      for svn_commit in self._process_tag_changeset(changeset, timestamp):
        yield svn_commit
    elif isinstance(changeset, BranchChangeset):
      for svn_commit in self._process_branch_changeset(changeset, timestamp):
        yield svn_commit
    else:
      raise TypeError('Illegal changeset %r' % changeset)
コード例 #30
0
class GitOutputOption(DVCSOutputOption):
  """An OutputOption that outputs to a git-fast-import formatted file.

  Members:

    dump_filename -- (string or None) the name of the file to which
        the git-fast-import commands for defining revisions will be
        written.  If None, the data will be written to stdout.

    author_transforms -- a map from CVS author names to git full name
        and email address.  See
        DVCSOutputOption.normalize_author_transforms() for information
        about the form of this parameter.

  """

  name = "Git"

  # The first mark number used for git-fast-import commit marks.  This
  # value needs to be large to avoid conflicts with blob marks.
  _first_commit_mark = 1000000000

  def __init__(
        self, revision_writer,
        dump_filename=None,
        author_transforms=None,
        tie_tag_fixup_branches=False,
        ):
    """Constructor.

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)  If it is None, then
    the output is written to stdout.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    TIE_TAG_FIXUP_BRANCHES means whether after finishing with a tag
    fixup branch, it should be psuedo-merged (ancestry linked but no
    content changes) back into its source branch, to dispose of the
    open head.

    """
    DVCSOutputOption.__init__(self)
    self.dump_filename = dump_filename
    self.revision_writer = revision_writer

    self.author_transforms = self.normalize_author_transforms(
        author_transforms
        )

    self.tie_tag_fixup_branches = tie_tag_fixup_branches

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)

  def register_artifacts(self, which_pass):
    DVCSOutputOption.register_artifacts(self, which_pass)
    self.revision_writer.register_artifacts(which_pass)

  def check_symbols(self, symbol_map):
    # FIXME: What constraints does git impose on symbols?
    pass

  def setup(self, svn_rev_count):
    DVCSOutputOption.setup(self, svn_rev_count)
    if self.dump_filename is None:
      self.f = sys.stdout
    else:
      self.f = open(self.dump_filename, 'wb')

    # The youngest revnum that has been committed so far:
    self._youngest = 0

    # A map {lod : [(revnum, mark)]} giving each of the revision
    # numbers in which there was a commit to lod, and the mark active
    # at the end of the revnum.
    self._marks = {}

    self.revision_writer.start(self._mirror, self.f)

  def _create_commit_mark(self, lod, revnum):
    mark = self._mark_generator.gen_id()
    self._set_lod_mark(lod, revnum, mark)
    return mark

  def _set_lod_mark(self, lod, revnum, mark):
    """Record MARK as the status of LOD for REVNUM.

    If there is already an entry for REVNUM, overwrite it.  If not,
    append a new entry to the self._marks list for LOD."""

    assert revnum >= self._youngest
    entry = (revnum, mark)
    try:
      modifications = self._marks[lod]
    except KeyError:
      # This LOD hasn't appeared before; create a new list and add the
      # entry:
      self._marks[lod] = [entry]
    else:
      # A record exists, so it necessarily has at least one element:
      if modifications[-1][0] == revnum:
        modifications[-1] = entry
      else:
        modifications.append(entry)
    self._youngest = revnum

  def _get_author(self, svn_commit):
    """Return the author to be used for SVN_COMMIT.

    Return the author as a UTF-8 string in the form needed by git
    fast-import; that is, 'name <email>'."""

    cvs_author = svn_commit.get_author()
    return self._map_author(cvs_author)

  def _map_author(self, cvs_author):
    return self.author_transforms.get(cvs_author, "%s <>" % (cvs_author,))

  @staticmethod
  def _get_log_msg(svn_commit):
    return svn_commit.get_log_msg()

  def process_initial_project_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)
    self._mirror.end_commit()

  def process_primary_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    lods = set()
    for cvs_rev in svn_commit.get_cvs_items():
      lods.add(cvs_rev.lod)
    if len(lods) != 1:
      raise InternalError('Commit affects %d LODs' % (len(lods),))
    lod = lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    if isinstance(lod, Trunk):
      # FIXME: is this correct?:
      self.f.write('commit refs/heads/master\n')
    else:
      self.f.write('commit refs/heads/%s\n' % (lod.name,))
    mark = self._create_commit_mark(lod, svn_commit.revnum)
    logger.normal(
        'Writing commit r%d on %s (mark :%d)'
        % (svn_commit.revnum, lod, mark,)
        )
    self.f.write('mark :%d\n' % (mark,))
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    for cvs_rev in svn_commit.get_cvs_items():
      self.revision_writer.process_revision(cvs_rev, post_commit=False)

    self.f.write('\n')
    self._mirror.end_commit()

  def process_post_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    source_lods = set()
    for cvs_rev in svn_commit.cvs_revs:
      source_lods.add(cvs_rev.lod)
    if len(source_lods) != 1:
      raise InternalError('Commit is from %d LODs' % (len(source_lods),))
    source_lod = source_lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    # FIXME: is this correct?:
    self.f.write('commit refs/heads/master\n')
    mark = self._create_commit_mark(None, svn_commit.revnum)
    logger.normal(
        'Writing post-commit r%d on Trunk (mark :%d)'
        % (svn_commit.revnum, mark,)
        )
    self.f.write('mark :%d\n' % (mark,))
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    self.f.write(
        'merge :%d\n'
        % (self._get_source_mark(source_lod, svn_commit.revnum),)
        )
    for cvs_rev in svn_commit.cvs_revs:
      self.revision_writer.process_revision(cvs_rev, post_commit=True)

    self.f.write('\n')
    self._mirror.end_commit()

  def _get_source_mark(self, source_lod, revnum):
    """Return the mark active on SOURCE_LOD at the end of REVNUM."""

    modifications = self._marks[source_lod]
    i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
    (revnum, mark) = modifications[i]
    return mark

  def describe_lod_to_user(self, lod):
    """This needs to make sense to users of the fastimported result."""
    if isinstance(lod, Trunk):
      return 'master'
    else:
      return lod.name

  def _describe_commit(self, svn_commit, lod):
      author = self._map_author(svn_commit.get_author())
      if author.endswith(" <>"):
        author = author[:-3]
      date = time.strftime(
          "%Y-%m-%d %H:%M:%S UTC", time.gmtime(svn_commit.date)
          )
      log_msg = svn_commit.get_log_msg()
      if log_msg.find('\n') != -1:
        log_msg = log_msg[:log_msg.index('\n')]
      return "%s %s %s '%s'" % (
          self.describe_lod_to_user(lod), date, author, log_msg,)

  def _process_symbol_commit(self, svn_commit, git_branch, source_groups):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    # There are two distinct cases we need to care for here:
    #  1. initial creation of a LOD
    #  2. fixup of an existing LOD to include more files, because the LOD in
    #     CVS was created piecemeal over time, with intervening commits

    # We look at _marks here, but self._mirror._get_lod_history(lod).exists()
    # might be technically more correct (though _get_lod_history is currently
    # underscore-private)
    is_initial_lod_creation = svn_commit.symbol not in self._marks

    # Create the mark, only after the check above
    mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)

    if is_initial_lod_creation:
      # Get the primary parent
      p_source_revnum, p_source_lod, p_cvs_symbols = source_groups[0]
      try:
        p_source_node = self._mirror.get_old_lod_directory(
            p_source_lod, p_source_revnum
            )
      except KeyError:
        raise InternalError('Source %r does not exist' % (p_source_lod,))
      cvs_files_to_delete = set(self._get_all_files(p_source_node))

      for (source_revnum, source_lod, cvs_symbols,) in source_groups:
        for cvs_symbol in cvs_symbols:
          cvs_files_to_delete.discard(cvs_symbol.cvs_file)

    self.f.write('commit %s\n' % (git_branch,))
    self.f.write('mark :%d\n' % (mark,))
    self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))

    # Only record actual DVCS ancestry for the primary sprout parent,
    # all the rest are effectively cherrypicks.
    if is_initial_lod_creation:
      self.f.write(
          'from :%d\n'
          % (self._get_source_mark(p_source_lod, p_source_revnum),)
          )

    for (source_revnum, source_lod, cvs_symbols,) in source_groups:
      for cvs_symbol in cvs_symbols:
        self.revision_writer.branch_file(cvs_symbol)

    if is_initial_lod_creation:
      for cvs_file in cvs_files_to_delete:
        self.f.write('D %s\n' % (cvs_file.cvs_path,))

    self.f.write('\n')
    return mark

  def process_branch_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)

    source_groups = self._get_source_groups(svn_commit)
    if self._is_simple_copy(svn_commit, source_groups):
      (source_revnum, source_lod, cvs_symbols) = source_groups[0]
      logger.debug(
          '%s will be created via a simple copy from %s:r%d'
          % (svn_commit.symbol, source_lod, source_revnum,)
          )
      mark = self._get_source_mark(source_lod, source_revnum)
      self._set_symbol(svn_commit.symbol, mark)
      self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
      self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
    else:
      logger.debug(
          '%s will be created via fixup commit(s)' % (svn_commit.symbol,)
          )
      self._process_symbol_commit(
          svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
          source_groups,
          )

    self._mirror.end_commit()

  def _set_symbol(self, symbol, mark):
    if isinstance(symbol, Branch):
      category = 'heads'
    elif isinstance(symbol, Tag):
      category = 'tags'
    else:
      raise InternalError()
    self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
    self.f.write('from :%d\n' % (mark,))

  def get_tag_fixup_branch_name(self, svn_commit):
    # The branch name to use for the "tag fixup branches".  The
    # git-fast-import documentation suggests using 'TAG_FIXUP'
    # (outside of the refs/heads namespace), but this is currently
    # broken.  Use a name containing '.', which is not allowed in CVS
    # symbols, to avoid conflicts (though of course a conflict could
    # still result if the user requests symbol transformations).
    return 'refs/heads/TAG.FIXUP'

  def process_tag_commit(self, svn_commit):
    # FIXME: For now we create a fixup branch with the same name as
    # the tag, then the tag.  We never delete the fixup branch.
    self._mirror.start_commit(svn_commit.revnum)

    source_groups = self._get_source_groups(svn_commit)
    if self._is_simple_copy(svn_commit, source_groups):
      (source_revnum, source_lod, cvs_symbols) = source_groups[0]
      logger.debug(
          '%s will be created via a simple copy from %s:r%d'
          % (svn_commit.symbol, source_lod, source_revnum,)
          )
      mark = self._get_source_mark(source_lod, source_revnum)
      self._set_symbol(svn_commit.symbol, mark)
      self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum)
      self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark)
    else:
      logger.debug(
          '%s will be created via a fixup branch' % (svn_commit.symbol,)
          )

      fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit)

      # Create the fixup branch (which might involve making more than
      # one commit):
      mark = self._process_symbol_commit(
          svn_commit, fixup_branch_name, source_groups
          )

      # Store the mark of the last commit to the fixup branch as the
      # value of the tag:
      self._set_symbol(svn_commit.symbol, mark)
      self.f.write('reset %s\n' % (fixup_branch_name,))
      self.f.write('\n')

      if self.tie_tag_fixup_branches:
        source_lod = source_groups[0][1]
        source_lod_git_branch = \
            'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),)

        mark2 = self._create_commit_mark(source_lod, svn_commit.revnum)
        author = self._map_author(Ctx().username)
        log_msg = self._get_log_msg_for_ancestry_tie(svn_commit)

        self.f.write('commit %s\n' % (source_lod_git_branch,))
        self.f.write('mark :%d\n' % (mark2,))
        self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
        self.f.write('data %d\n' % (len(log_msg),))
        self.f.write('%s\n' % (log_msg,))

        self.f.write(
            'merge :%d\n'
            % (mark,)
            )

        self.f.write('\n')

    self._mirror.end_commit()

  def _get_log_msg_for_ancestry_tie(self, svn_commit):
    return Ctx().text_wrapper.fill(
        Ctx().tie_tag_ancestry_message % {
            'symbol_name' : svn_commit.symbol.name,
            }
        )

  def cleanup(self):
    DVCSOutputOption.cleanup(self)
    self.revision_writer.finish()
    if self.dump_filename is not None:
      self.f.close()
    del self.f
コード例 #31
0
class GitRevisionRecorder(FulltextRevisionRecorder):
    """Output file revisions to git-fast-import."""
    def __init__(self, blob_filename):
        self.blob_filename = blob_filename

    def start(self):
        self.dump_file = open(self.blob_filename, 'wb')
        self._mark_generator = KeyGenerator()

    def start_file(self, cvs_file_items):
        self._cvs_file_items = cvs_file_items

    def _get_original_source(self, cvs_rev):
        """Return the original source of the contents of CVS_REV.

    Return the first non-delete CVSRevision with the same contents as
    CVS_REV.  'First' here refers to deltatext order; i.e., the very
    first revision is HEAD on trunk, then backwards to the root of a
    branch, then out to the tip of a branch.

    The candidates are all revisions along the CVS delta-dependency
    chain until the next one that has a deltatext (inclusive).  Of the
    candidates, CVSRevisionDeletes are disqualified because, even
    though CVS records their contents, it is impossible to extract
    their fulltext using commands like 'cvs checkout -p'.

    If there is no other CVSRevision that has the same content, return
    CVS_REV itself."""

        # Keep track of the "best" source CVSRevision found so far:
        best_source_rev = None

        for cvs_rev in itertools.chain(
            [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)):
            if not isinstance(cvs_rev, CVSRevisionDelete):
                best_source_rev = cvs_rev

            if cvs_rev.deltatext_exists:
                break

        return best_source_rev

    def record_fulltext(self, cvs_rev, log, fulltext):
        """Write the fulltext to a blob if it is original and not a delete.

    The reason we go to this trouble is to avoid writing the same file
    contents multiple times for a string of revisions that don't have
    deltatexts (as, for example, happens with dead revisions and
    imported revisions)."""

        if isinstance(cvs_rev, CVSRevisionDelete):
            # There is no need to record a delete revision, and its token
            # will never be needed:
            return None

        source = self._get_original_source(cvs_rev)

        if source.id == cvs_rev.id:
            # Revision is its own source; write it out:
            mark = self._mark_generator.gen_id()
            self.dump_file.write('blob\n')
            self.dump_file.write('mark :%d\n' % (mark, ))
            self.dump_file.write('data %d\n' % (len(fulltext), ))
            self.dump_file.write(fulltext)
            self.dump_file.write('\n')
            return mark
        else:
            # Return as revision_recorder_token the CVSRevision.id of the
            # original source revision:
            return source.revision_recorder_token

    def finish_file(self, cvs_file_items):
        # Determine the original source of each CVSSymbol, and store it as
        # the symbol's revision_recorder_token.
        for cvs_item in cvs_file_items.values():
            if isinstance(cvs_item, CVSSymbol):
                cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
                cvs_item.revision_recorder_token = cvs_source.revision_recorder_token

        del self._cvs_file_items

    def finish(self):
        self.dump_file.close()
コード例 #32
0
class GitRevisionCollector(RevisionCollector):
  """Output file revisions to git-fast-import."""

  def __init__(self, revision_reader, blob_filename=None):
    self.revision_reader = revision_reader
    self.blob_filename = blob_filename

  def register_artifacts(self, which_pass):
    self.revision_reader.register_artifacts(which_pass)
    if self.blob_filename is None:
      artifact_manager.register_temp_file(
        config.GIT_BLOB_DATAFILE, which_pass,
        )

  def start(self):
    self.revision_reader.start()
    if self.blob_filename is None:
      self.dump_file = open(
          artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'wb',
          )
    else:
      self.dump_file = open(self.blob_filename, 'wb')
    self._mark_generator = KeyGenerator()

  def _process_revision(self, cvs_rev):
    """Write the revision fulltext to a blob if it is not dead."""

    if isinstance(cvs_rev, CVSRevisionDelete):
      # There is no need to record a delete revision, and its token
      # will never be needed:
      return

    # FIXME: We have to decide what to do about keyword substitution
    # and eol_style here:
    fulltext = self.revision_reader.get_content(cvs_rev)

    mark = self._mark_generator.gen_id()
    self.dump_file.write('blob\n')
    self.dump_file.write('mark :%d\n' % (mark,))
    self.dump_file.write('data %d\n' % (len(fulltext),))
    self.dump_file.write(fulltext)
    self.dump_file.write('\n')
    cvs_rev.revision_reader_token = mark

  def _process_symbol(self, cvs_symbol, cvs_file_items):
    """Record the original source of CVS_SYMBOL.

    Determine the original revision source of CVS_SYMBOL, and store it
    as the symbol's revision_reader_token."""

    cvs_source = cvs_symbol.get_cvs_revision_source(cvs_file_items)
    cvs_symbol.revision_reader_token = cvs_source.revision_reader_token

  def process_file(self, cvs_file_items):
    for lod_items in cvs_file_items.iter_lods():
      for cvs_rev in lod_items.cvs_revisions:
        self._process_revision(cvs_rev)

    # Now that all CVSRevisions' revision_reader_tokens are set,
    # iterate through symbols and set their tokens to those of their
    # original source revisions:
    for lod_items in cvs_file_items.iter_lods():
      if lod_items.cvs_branch is not None:
        self._process_symbol(lod_items.cvs_branch, cvs_file_items)
      for cvs_tag in lod_items.cvs_tags:
        self._process_symbol(cvs_tag, cvs_file_items)

  def finish(self):
    self.revision_reader.finish()
    self.dump_file.close()
コード例 #33
0
class ExternalBlobGenerator(RevisionCollector):
  """Have generate_blobs.py output file revisions to a blob file."""

  def __init__(self, blob_filename=None):
    self.blob_filename = blob_filename

  def register_artifacts(self, which_pass):
    RevisionCollector.register_artifacts(self, which_pass)
    if self.blob_filename is None:
      artifact_manager.register_temp_file(
        config.GIT_BLOB_DATAFILE, which_pass,
        )

  def start(self):
    self._mark_generator = KeyGenerator()
    logger.normal('Starting generate_blobs.py...')
    if self.blob_filename is None:
      blob_filename = artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE)
    else:
      blob_filename = self.blob_filename
    self._pipe = subprocess.Popen(
        [
            sys.executable,
            os.path.join(os.path.dirname(__file__), 'generate_blobs.py'),
            blob_filename,
            ],
        stdin=subprocess.PIPE,
        )

  def _process_symbol(self, cvs_symbol, cvs_file_items):
    """Record the original source of CVS_SYMBOL.

    Determine the original revision source of CVS_SYMBOL, and store it
    as the symbol's revision_reader_token."""

    cvs_source = cvs_symbol.get_cvs_revision_source(cvs_file_items)
    cvs_symbol.revision_reader_token = cvs_source.revision_reader_token

  def process_file(self, cvs_file_items):
    marks = {}
    for lod_items in cvs_file_items.iter_lods():
      for cvs_rev in lod_items.cvs_revisions:
        if not isinstance(cvs_rev, CVSRevisionDelete):
          mark = self._mark_generator.gen_id()
          cvs_rev.revision_reader_token = mark
          marks[cvs_rev.rev] = mark

    if marks:
      # A separate pickler is used for each dump(), so that its memo
      # doesn't grow very large.  The default ASCII protocol is used so
      # that this works without changes on systems that distinguish
      # between text and binary files.
      pickle.dump((cvs_file_items.cvs_file.rcs_path, marks), self._pipe.stdin)
      self._pipe.stdin.flush()

    # Now that all CVSRevisions' revision_reader_tokens are set,
    # iterate through symbols and set their tokens to those of their
    # original source revisions:
    for lod_items in cvs_file_items.iter_lods():
      if lod_items.cvs_branch is not None:
        self._process_symbol(lod_items.cvs_branch, cvs_file_items)
      for cvs_tag in lod_items.cvs_tags:
        self._process_symbol(cvs_tag, cvs_file_items)

  def finish(self):
    self._pipe.stdin.close()
    logger.normal('Waiting for generate_blobs.py to finish...')
    returncode = self._pipe.wait()
    if returncode:
      raise FatalError(
          'generate_blobs.py failed with return code %s.' % (returncode,)
          )
    else:
      logger.normal('generate_blobs.py is done.')
コード例 #34
0
class SVNCommitCreator:
    """This class creates and yields SVNCommits via process_changeset()."""
    def __init__(self):
        # The revision number to assign to the next new SVNCommit.
        self.revnum_generator = KeyGenerator()

        # A set containing the Projects that have already been
        # initialized:
        self._initialized_projects = set()

    def _post_commit(self, cvs_revs, motivating_revnum, timestamp):
        """Generate any SVNCommits needed to follow CVS_REVS.

    That is, handle non-trunk default branches.  A revision on a CVS
    non-trunk default branch is visible in a default CVS checkout of
    HEAD.  So we copy such commits over to Subversion's trunk so that
    checking out SVN trunk gives the same output as checking out of
    CVS's default branch."""

        cvs_revs = [
            cvs_rev for cvs_rev in cvs_revs
            if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop)
        ]

        if cvs_revs:
            cvs_revs.sort(
                lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path))
            # Generate an SVNCommit for all of our default branch cvs_revs.
            yield SVNPostCommit(
                motivating_revnum,
                cvs_revs,
                timestamp,
                self.revnum_generator.gen_id(),
            )

    def _process_revision_changeset(self, changeset, timestamp):
        """Process CHANGESET, using TIMESTAMP as the commit time.

    Create and yield one or more SVNCommits in the process.  CHANGESET
    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
    for any resulting SVNCommits."""

        if not changeset.cvs_item_ids:
            logger.warn('Changeset has no items: %r' % changeset)
            return

        logger.verbose('-' * 60)
        logger.verbose('CVS Revision grouping:')
        logger.verbose('  Time: %s' % time.ctime(timestamp))

        # Generate an SVNCommit unconditionally.  Even if the only change in
        # this group of CVSRevisions is a deletion of an already-deleted
        # file (that is, a CVS revision in state 'dead' whose predecessor
        # was also in state 'dead'), the conversion will still generate a
        # Subversion revision containing the log message for the second dead
        # revision, because we don't want to lose that information.

        cvs_revs = list(changeset.iter_cvs_items())
        if cvs_revs:
            cvs_revs.sort(
                lambda a, b: cmp(a.cvs_file.rcs_path, b.cvs_file.rcs_path))
            svn_commit = SVNPrimaryCommit(cvs_revs, timestamp,
                                          self.revnum_generator.gen_id())

            yield svn_commit

            for cvs_rev in cvs_revs:
                Ctx()._symbolings_logger.log_revision(cvs_rev,
                                                      svn_commit.revnum)

            # Generate an SVNPostCommit if we have default branch revs.  If
            # some of the revisions in this commit happened on a non-trunk
            # default branch, then those files have to be copied into trunk
            # manually after being changed on the branch (because the RCS
            # "default branch" appears as head, i.e., trunk, in practice).
            # Unfortunately, Subversion doesn't support copies with sources
            # in the current txn.  All copies must be based in committed
            # revisions.  Therefore, we generate the copies in a new
            # revision.
            for svn_post_commit in self._post_commit(cvs_revs,
                                                     svn_commit.revnum,
                                                     timestamp):
                yield svn_post_commit

    def _process_tag_changeset(self, changeset, timestamp):
        """Process TagChangeset CHANGESET, producing a SVNTagCommit.

    Filter out CVSTagNoops.  If no CVSTags are left, don't generate a
    SVNTagCommit."""

        if Ctx().trunk_only:
            raise InternalError(
                'TagChangeset encountered during a --trunk-only conversion')

        cvs_tag_ids = [
            cvs_tag.id for cvs_tag in changeset.iter_cvs_items()
            if not isinstance(cvs_tag, CVSTagNoop)
        ]
        if cvs_tag_ids:
            yield SVNTagCommit(
                changeset.symbol,
                cvs_tag_ids,
                timestamp,
                self.revnum_generator.gen_id(),
            )
        else:
            logger.debug('Omitting %r because it contains only CVSTagNoops' %
                         (changeset, ))

    def _process_branch_changeset(self, changeset, timestamp):
        """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.

    Filter out CVSBranchNoops.  If no CVSBranches are left, don't
    generate a SVNBranchCommit."""

        if Ctx().trunk_only:
            raise InternalError(
                'BranchChangeset encountered during a --trunk-only conversion')

        cvs_branches = [
            cvs_branch for cvs_branch in changeset.iter_cvs_items()
            if not isinstance(cvs_branch, CVSBranchNoop)
        ]
        if cvs_branches:
            svn_commit = SVNBranchCommit(
                changeset.symbol,
                [cvs_branch.id for cvs_branch in cvs_branches],
                timestamp,
                self.revnum_generator.gen_id(),
            )
            yield svn_commit
            for cvs_branch in cvs_branches:
                Ctx()._symbolings_logger.log_branch_revision(
                    cvs_branch, svn_commit.revnum)
        else:
            logger.debug(
                'Omitting %r because it contains only CVSBranchNoops' %
                (changeset, ))

    def process_changeset(self, changeset, timestamp):
        """Process CHANGESET, using TIMESTAMP for all of its entries.

    Return a generator that generates the resulting SVNCommits.

    The changesets must be fed to this function in proper dependency
    order."""

        # First create any new projects that might be opened by the
        # changeset:
        projects_opened = \
            changeset.get_projects_opened() - self._initialized_projects
        if projects_opened:
            if Ctx().cross_project_commits:
                yield SVNInitialProjectCommit(timestamp, projects_opened,
                                              self.revnum_generator.gen_id())
            else:
                for project in projects_opened:
                    yield SVNInitialProjectCommit(
                        timestamp, [project], self.revnum_generator.gen_id())
            self._initialized_projects.update(projects_opened)

        if isinstance(changeset, OrderedChangeset):
            for svn_commit \
                    in self._process_revision_changeset(changeset, timestamp):
                yield svn_commit
        elif isinstance(changeset, TagChangeset):
            for svn_commit in self._process_tag_changeset(
                    changeset, timestamp):
                yield svn_commit
        elif isinstance(changeset, BranchChangeset):
            for svn_commit in self._process_branch_changeset(
                    changeset, timestamp):
                yield svn_commit
        else:
            raise TypeError('Illegal changeset %r' % changeset)
コード例 #35
0
class RepositoryMirror:
  """Mirror a repository and its history.

  Mirror a repository as it is constructed, one revision at a time.
  For each LineOfDevelopment we store a skeleton of the directory
  structure within that LOD for each revnum in which it changed.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  revnum in which files were added to or deleted from that LOD, as
  well as the node id of the root of the node tree describing the LOD
  contents at that revision.

  The LOD trees themselves are stored in the _node_db database, which
  maps node ids to nodes.  A node is a map from CVSPath to ids of the
  corresponding subnodes.  The _node_db is stored on disk and each
  access is expensive.

  The _node_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each commit and end_commit()
  afterwards."""

  def register_artifacts(self, which_pass):
    """Register the artifacts that will be needed for this object."""

    artifact_manager.register_temp_file(
        config.MIRROR_NODES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(
        config.MIRROR_NODES_STORE, which_pass
        )

  def open(self):
    """Set up the RepositoryMirror and prepare it for commits."""

    self._key_generator = KeyGenerator()

    # A map from LOD to LODHistory instance for all LODs that have
    # been referenced so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track file existence, not file contents.)
    self._node_db = _NodeDatabase()

    # Start at revision 0 without a root node.
    self._youngest = 0

  def start_commit(self, revnum):
    """Start a new commit."""

    assert revnum > self._youngest
    self._youngest = revnum

    # A map {node_id : _WritableMirrorDirectoryMixin}.
    self._new_nodes = {}

  def end_commit(self):
    """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

    # Copy the new nodes to the _node_db
    self._node_db.write_new_nodes([
        node
        for node in self._new_nodes.values()
        if not isinstance(node, DeletedCurrentMirrorDirectory)
        ])

    del self._new_nodes

  def _get_lod_history(self, lod):
    """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

    try:
      return self._lod_histories[lod]
    except KeyError:
      lod_history = LODHistory(self, lod)
      self._lod_histories[lod] = lod_history
      return lod_history

  def get_old_lod_directory(self, lod, revnum):
    """Return the directory for the root path of LOD at revision REVNUM.

    Return an instance of MirrorDirectory if the path exists;
    otherwise, raise KeyError."""

    lod_history = self._get_lod_history(lod)
    id = lod_history.get_id(revnum)
    return OldMirrorDirectory(self, id, self._node_db[id])

  def get_old_path(self, cvs_path, lod, revnum):
    """Return the node for CVS_PATH from LOD at REVNUM.

    If CVS_PATH is a CVSDirectory, then return an instance of
    OldMirrorDirectory.  If CVS_PATH is a CVSFile, return None.

    If CVS_PATH does not exist in the specified LOD and REVNUM, raise
    KeyError."""

    node = self.get_old_lod_directory(lod, revnum)

    for sub_path in cvs_path.get_ancestry()[1:]:
      node = node[sub_path]

    return node

  def get_current_lod_directory(self, lod):
    """Return the directory for the root path of LOD in the current revision.

    Return an instance of CurrentMirrorDirectory.  Raise KeyError if
    the path doesn't already exist."""

    lod_history = self._get_lod_history(lod)
    id = lod_history.get_current_id()
    try:
      return self._new_nodes[id]
    except KeyError:
      return _CurrentMirrorReadOnlyLODDirectory(
          self, id, lod, self._node_db[id]
          )

  def get_current_path(self, cvs_path, lod):
    """Return the node for CVS_PATH from LOD in the current revision.

    If CVS_PATH is a CVSDirectory, then return an instance of
    CurrentMirrorDirectory.  If CVS_PATH is a CVSFile, return None.

    If CVS_PATH does not exist in the current revision of the
    specified LOD, raise KeyError."""

    node = self.get_current_lod_directory(lod)

    for sub_path in cvs_path.get_ancestry()[1:]:
      node = node[sub_path]

    return node

  def add_lod(self, lod):
    """Create a new LOD in this repository.

    Return the CurrentMirrorDirectory that was created.  If the LOD
    already exists, raise LODExistsError."""

    lod_history = self._get_lod_history(lod)
    if lod_history.exists():
      raise LODExistsError(
          'Attempt to create %s in repository mirror when it already exists.'
          % (lod,)
          )
    new_node = _CurrentMirrorWritableLODDirectory(
        self, self._key_generator.gen_id(), lod, {}
        )
    lod_history.update(self._youngest, new_node.id)
    self._new_nodes[new_node.id] = new_node
    return new_node

  def copy_lod(self, src_lod, dest_lod, src_revnum):
    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD, as a CurrentMirrorDirectory."""

    # Get the node of our src_path
    src_node = self.get_old_lod_directory(src_lod, src_revnum)

    dest_lod_history = self._get_lod_history(dest_lod)
    if dest_lod_history.exists():
      raise LODExistsError(
          'Attempt to copy to %s in repository mirror when it already exists.'
          % (dest_lod,)
          )

    dest_lod_history.update(self._youngest, src_node.id)

    # Return src_node, except packaged up as a CurrentMirrorDirectory:
    return self.get_current_lod_directory(dest_lod)

  def close(self):
    """Free resources and close databases."""

    self._lod_histories = None
    self._node_db.close()
    self._node_db = None
コード例 #36
0
class GitOutputOption(OutputOption):
  """An OutputOption that outputs to a git-fast-import formatted file.

  Members:

    dump_filename -- (string) the name of the file to which the
        git-fast-import commands for defining revisions will be
        written.

    author_transforms -- a map {cvsauthor : (fullname, email)} from
        CVS author names to git full name and email address.  All of
        the contents are 8-bit strings encoded as UTF-8.

  """

  # The first mark number used for git-fast-import commit marks.  This
  # value needs to be large to avoid conflicts with blob marks.
  _first_commit_mark = 1000000000

  def __init__(
        self, dump_filename, revision_writer,
        max_merges=None, author_transforms=None,
        ):
    """Constructor.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    MAX_MERGES can be set to an integer telling the maximum number of
    parents that can be merged into a commit at once (aside from the
    natural parent).  If it is set to None, then there is no limit.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    """

    self.dump_filename = dump_filename
    self.revision_writer = revision_writer
    self.max_merges = max_merges

    def to_utf8(s):
      if isinstance(s, unicode):
        return s.encode('utf8')
      else:
        return s

    self.author_transforms = {}
    if author_transforms is not None:
      for (cvsauthor, (name, email,)) in author_transforms.iteritems():
        cvsauthor = to_utf8(cvsauthor)
        name = to_utf8(name)
        email = to_utf8(email)
        self.author_transforms[cvsauthor] = (name, email,)

    self._mirror = RepositoryMirror()

    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)

  def register_artifacts(self, which_pass):
    # These artifacts are needed for SymbolingsReader:
    artifact_manager.register_temp_file_needed(
        config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
        )
    artifact_manager.register_temp_file_needed(
        config.SYMBOL_OFFSETS_DB, which_pass
        )
    self.revision_writer.register_artifacts(which_pass)
    self._mirror.register_artifacts(which_pass)

  def check(self):
    if Ctx().cross_project_commits:
      raise FatalError(
          'Git output is not supported with cross-project commits'
          )
    if Ctx().cross_branch_commits:
      raise FatalError(
          'Git output is not supported with cross-branch commits'
          )
    if Ctx().username is None:
      raise FatalError(
          'Git output requires a default commit username'
          )

  def check_symbols(self, symbol_map):
    # FIXME: What constraints does git impose on symbols?
    pass

  def setup(self, svn_rev_count):
    self._symbolings_reader = SymbolingsReader()
    self.f = open(self.dump_filename, 'wb')

    # The youngest revnum that has been committed so far:
    self._youngest = 0

    # A map {lod : [(revnum, mark)]} giving each of the revision
    # numbers in which there was a commit to lod, and the mark active
    # at the end of the revnum.
    self._marks = {}

    self._mirror.open()
    self.revision_writer.start(self.f, self._mirror)

  def _create_commit_mark(self, lod, revnum):
    mark = self._mark_generator.gen_id()
    self._set_lod_mark(lod, revnum, mark)
    return mark

  def _set_lod_mark(self, lod, revnum, mark):
    """Record MARK as the status of LOD for REVNUM.

    If there is already an entry for REVNUM, overwrite it.  If not,
    append a new entry to the self._marks list for LOD."""

    assert revnum >= self._youngest
    entry = (revnum, mark)
    try:
      modifications = self._marks[lod]
    except KeyError:
      # This LOD hasn't appeared before; create a new list and add the
      # entry:
      self._marks[lod] = [entry]
    else:
      # A record exists, so it necessarily has at least one element:
      if modifications[-1][0] == revnum:
        modifications[-1] = entry
      else:
        modifications.append(entry)
    self._youngest = revnum

  def _get_author(self, svn_commit):
    """Return the author to be used for SVN_COMMIT.

    Return the author in the form needed by git; that is, 'foo <bar>'."""

    author = svn_commit.get_author()
    (name, email,) = self.author_transforms.get(author, (author, author,))
    return '%s <%s>' % (name, email,)

  @staticmethod
  def _get_log_msg(svn_commit):
    return svn_commit.get_log_msg()

  def process_initial_project_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)
    self._mirror.end_commit()

  def process_primary_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    lods = set()
    for cvs_rev in svn_commit.get_cvs_items():
      lods.add(cvs_rev.lod)
    if len(lods) != 1:
      raise InternalError('Commit affects %d LODs' % (len(lods),))
    lod = lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    if isinstance(lod, Trunk):
      # FIXME: is this correct?:
      self.f.write('commit refs/heads/master\n')
    else:
      self.f.write('commit refs/heads/%s\n' % (lod.name,))
    self.f.write(
        'mark :%d\n'
        % (self._create_commit_mark(lod, svn_commit.revnum),)
        )
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    for cvs_rev in svn_commit.get_cvs_items():
      self.revision_writer.process_revision(cvs_rev, post_commit=False)

    self.f.write('\n')
    self._mirror.end_commit()

  def process_post_commit(self, svn_commit):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    source_lods = set()
    for cvs_rev in svn_commit.cvs_revs:
      source_lods.add(cvs_rev.lod)
    if len(source_lods) != 1:
      raise InternalError('Commit is from %d LODs' % (len(source_lods),))
    source_lod = source_lods.pop()

    self._mirror.start_commit(svn_commit.revnum)
    # FIXME: is this correct?:
    self.f.write('commit refs/heads/master\n')
    self.f.write(
        'mark :%d\n'
        % (self._create_commit_mark(None, svn_commit.revnum),)
        )
    self.f.write(
        'committer %s %d +0000\n' % (author, svn_commit.date,)
        )
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))
    self.f.write(
        'merge :%d\n'
        % (self._get_source_mark(source_lod, svn_commit.revnum),)
        )
    for cvs_rev in svn_commit.cvs_revs:
      self.revision_writer.process_revision(cvs_rev, post_commit=True)

    self.f.write('\n')
    self._mirror.end_commit()

  def _get_source_groups(self, svn_commit):
    """Return groups of sources for SVN_COMMIT.

    SVN_COMMIT is an instance of SVNSymbolCommit.  Yield tuples
    (source_lod, svn_revnum, cvs_symbols) where source_lod is the line
    of development and svn_revnum is the revision that should serve as
    a source, and cvs_symbols is a list of CVSSymbolItems that can be
    copied from that source.  The groups are returned in arbitrary
    order."""

    # Get a map {CVSSymbol : SVNRevisionRange}:
    range_map = self._symbolings_reader.get_range_map(svn_commit)

    # range_map, split up into one map per LOD; i.e., {LOD :
    # {CVSSymbol : SVNRevisionRange}}:
    lod_range_maps = {}

    for (cvs_symbol, range) in range_map.iteritems():
      lod_range_map = lod_range_maps.get(range.source_lod)
      if lod_range_map is None:
        lod_range_map = {}
        lod_range_maps[range.source_lod] = lod_range_map
      lod_range_map[cvs_symbol] = range

    # Sort the sources so that the branch that serves most often as
    # parent is processed first:
    lod_ranges = lod_range_maps.items()
    lod_ranges.sort(
        lambda (lod1,lod_range_map1),(lod2,lod_range_map2):
        -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)
        )

    for (lod, lod_range_map) in lod_ranges:
      while lod_range_map:
        revision_scores = RevisionScores(lod_range_map.values())
        (source_lod, revnum, score) = revision_scores.get_best_revnum()
        assert source_lod == lod
        cvs_symbols = []
        for (cvs_symbol, range) in lod_range_map.items():
          if revnum in range:
            cvs_symbols.append(cvs_symbol)
            del lod_range_map[cvs_symbol]
        yield (lod, revnum, cvs_symbols)

  def _get_all_files(self, node):
    """Generate all of the CVSFiles under NODE."""

    for cvs_path in node:
      subnode = node[cvs_path]
      if subnode is None:
        yield cvs_path
      else:
        for sub_cvs_path in self._get_all_files(subnode):
          yield sub_cvs_path

  def _is_simple_copy(self, svn_commit, source_groups):
    """Return True iff SVN_COMMIT can be created as a simple copy.

    SVN_COMMIT is an SVNTagCommit.  Return True iff it can be created
    as a simple copy from an existing revision (i.e., if the fixup
    branch can be avoided for this tag creation)."""

    # The first requirement is that there be exactly one source:
    if len(source_groups) != 1:
      return False

    (source_lod, svn_revnum, cvs_symbols) = source_groups[0]

    # The second requirement is that the destination LOD not already
    # exist:
    try:
      self._mirror.get_current_lod_directory(svn_commit.symbol)
    except KeyError:
      # The LOD doesn't already exist.  This is good.
      pass
    else:
      # The LOD already exists.  It cannot be created by a copy.
      return False

    # The third requirement is that the source LOD contains exactly
    # the same files as we need to add to the symbol:
    try:
      source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum)
    except KeyError:
      raise InternalError('Source %r does not exist' % (source_lod,))
    return (
        set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols])
        == set(self._get_all_files(source_node))
        )

  def _get_source_mark(self, source_lod, revnum):
    """Return the mark active on SOURCE_LOD at the end of REVNUM."""

    modifications = self._marks[source_lod]
    i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
    (revnum, mark) = modifications[i]
    return mark

  def _process_symbol_commit(
        self, svn_commit, git_branch, source_groups, mark
        ):
    author = self._get_author(svn_commit)
    log_msg = self._get_log_msg(svn_commit)

    self.f.write('commit %s\n' % (git_branch,))
    self.f.write('mark :%d\n' % (mark,))
    self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
    self.f.write('data %d\n' % (len(log_msg),))
    self.f.write('%s\n' % (log_msg,))

    for (source_lod, source_revnum, cvs_symbols,) in source_groups:
      self.f.write(
          'merge :%d\n'
          % (self._get_source_mark(source_lod, source_revnum),)
          )

    for (source_lod, source_revnum, cvs_symbols,) in source_groups:
      for cvs_symbol in cvs_symbols:
        self.revision_writer.branch_file(cvs_symbol)

    self.f.write('\n')

  def process_branch_commit(self, svn_commit):
    self._mirror.start_commit(svn_commit.revnum)
    source_groups = list(self._get_source_groups(svn_commit))
    for groups in get_chunks(source_groups, self.max_merges):
      self._process_symbol_commit(
          svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
          groups,
          self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
          )
    self._mirror.end_commit()

  def _set_symbol(self, symbol, mark):
    if isinstance(symbol, Branch):
      category = 'heads'
    elif isinstance(symbol, Tag):
      category = 'tags'
    else:
      raise InternalError()
    self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
    self.f.write('from :%d\n' % (mark,))

  def process_tag_commit(self, svn_commit):
    # FIXME: For now we create a fixup branch with the same name as
    # the tag, then the tag.  We never delete the fixup branch.  Also,
    # a fixup branch is created even if the tag could be created from
    # a single source.
    self._mirror.start_commit(svn_commit.revnum)

    source_groups = list(self._get_source_groups(svn_commit))
    if self._is_simple_copy(svn_commit, source_groups):
      (source_lod, source_revnum, cvs_symbols) = source_groups[0]
      Log().debug(
          '%s will be created via a simple copy from %s:r%d'
          % (svn_commit.symbol, source_lod, source_revnum,)
          )
      mark = self._get_source_mark(source_lod, source_revnum)
      self._set_symbol(svn_commit.symbol, mark)
    else:
      Log().debug(
          '%s will be created via a fixup branch' % (svn_commit.symbol,)
          )

      # Create the fixup branch (which might involve making more than
      # one commit):
      for groups in get_chunks(source_groups, self.max_merges):
        mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
        self._process_symbol_commit(
            svn_commit, FIXUP_BRANCH_NAME, groups, mark
            )

      # Store the mark of the last commit to the fixup branch as the
      # value of the tag:
      self._set_symbol(svn_commit.symbol, mark)
      self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,))
      self.f.write('\n')

    self._mirror.end_commit()

  def cleanup(self):
    self.revision_writer.finish()
    self._mirror.close()
    self.f.close()
    del self.f
    self._symbolings_reader.close()
    del self._symbolings_reader
コード例 #37
0
class GitOutputOption(OutputOption):
    """An OutputOption that outputs to a git-fast-import formatted file.

  Members:

    dump_filename -- (string) the name of the file to which the
        git-fast-import commands for defining revisions will be
        written.

    author_transforms -- a map {cvsauthor : (fullname, email)} from
        CVS author names to git full name and email address.  All of
        the contents are 8-bit strings encoded as UTF-8.

  """

    # The first mark number used for git-fast-import commit marks.  This
    # value needs to be large to avoid conflicts with blob marks.
    _first_commit_mark = 1000000000

    def __init__(
        self,
        dump_filename,
        revision_writer,
        max_merges=None,
        author_transforms=None,
    ):
        """Constructor.

    DUMP_FILENAME is the name of the file to which the git-fast-import
    commands for defining revisions should be written.  (Please note
    that depending on the style of revision writer, the actual file
    contents might not be written to this file.)

    REVISION_WRITER is a GitRevisionWriter that is used to output
    either the content of revisions or a mark that was previously used
    to label a blob.

    MAX_MERGES can be set to an integer telling the maximum number of
    parents that can be merged into a commit at once (aside from the
    natural parent).  If it is set to None, then there is no limit.

    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
    CVS author names to git full name and email address.  All of the
    contents should either be Unicode strings or 8-bit strings encoded
    as UTF-8.

    """

        self.dump_filename = dump_filename
        self.revision_writer = revision_writer
        self.max_merges = max_merges

        def to_utf8(s):
            if isinstance(s, unicode):
                return s.encode('utf8')
            else:
                return s

        self.author_transforms = {}
        if author_transforms is not None:
            for (cvsauthor, (
                    name,
                    email,
            )) in author_transforms.iteritems():
                cvsauthor = to_utf8(cvsauthor)
                name = to_utf8(name)
                email = to_utf8(email)
                self.author_transforms[cvsauthor] = (
                    name,
                    email,
                )

        self._mirror = RepositoryMirror()

        self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)

    def register_artifacts(self, which_pass):
        # These artifacts are needed for SymbolingsReader:
        artifact_manager.register_temp_file_needed(
            config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass)
        artifact_manager.register_temp_file_needed(config.SYMBOL_OFFSETS_DB,
                                                   which_pass)
        self.revision_writer.register_artifacts(which_pass)
        self._mirror.register_artifacts(which_pass)

    def check(self):
        if Ctx().cross_project_commits:
            raise FatalError(
                'Git output is not supported with cross-project commits')
        if Ctx().cross_branch_commits:
            raise FatalError(
                'Git output is not supported with cross-branch commits')
        if Ctx().username is None:
            raise FatalError('Git output requires a default commit username')

    def check_symbols(self, symbol_map):
        # FIXME: What constraints does git impose on symbols?
        pass

    def setup(self, svn_rev_count):
        self._symbolings_reader = SymbolingsReader()
        self.f = open(self.dump_filename, 'wb')

        # The youngest revnum that has been committed so far:
        self._youngest = 0

        # A map {lod : [(revnum, mark)]} giving each of the revision
        # numbers in which there was a commit to lod, and the mark active
        # at the end of the revnum.
        self._marks = {}

        self._mirror.open()
        self.revision_writer.start(self.f, self._mirror)

    def _create_commit_mark(self, lod, revnum):
        mark = self._mark_generator.gen_id()
        self._set_lod_mark(lod, revnum, mark)
        return mark

    def _set_lod_mark(self, lod, revnum, mark):
        """Record MARK as the status of LOD for REVNUM.

    If there is already an entry for REVNUM, overwrite it.  If not,
    append a new entry to the self._marks list for LOD."""

        assert revnum >= self._youngest
        entry = (revnum, mark)
        try:
            modifications = self._marks[lod]
        except KeyError:
            # This LOD hasn't appeared before; create a new list and add the
            # entry:
            self._marks[lod] = [entry]
        else:
            # A record exists, so it necessarily has at least one element:
            if modifications[-1][0] == revnum:
                modifications[-1] = entry
            else:
                modifications.append(entry)
        self._youngest = revnum

    def _get_author(self, svn_commit):
        """Return the author to be used for SVN_COMMIT.

    Return the author in the form needed by git; that is, 'foo <bar>'."""

        author = svn_commit.get_author()
        (
            name,
            email,
        ) = self.author_transforms.get(author, (
            author,
            author,
        ))
        return '%s <%s>' % (
            name,
            email,
        )

    @staticmethod
    def _get_log_msg(svn_commit):
        return svn_commit.get_log_msg()

    def process_initial_project_commit(self, svn_commit):
        self._mirror.start_commit(svn_commit.revnum)
        self._mirror.end_commit()

    def process_primary_commit(self, svn_commit):
        author = self._get_author(svn_commit)
        log_msg = self._get_log_msg(svn_commit)

        lods = set()
        for cvs_rev in svn_commit.get_cvs_items():
            lods.add(cvs_rev.lod)
        if len(lods) != 1:
            raise InternalError('Commit affects %d LODs' % (len(lods), ))
        lod = lods.pop()

        self._mirror.start_commit(svn_commit.revnum)
        if isinstance(lod, Trunk):
            # FIXME: is this correct?:
            self.f.write('commit refs/heads/master\n')
        else:
            self.f.write('commit refs/heads/%s\n' % (lod.name, ))
        self.f.write('mark :%d\n' %
                     (self._create_commit_mark(lod, svn_commit.revnum), ))
        self.f.write('committer %s %d +0000\n' % (
            author,
            svn_commit.date,
        ))
        self.f.write('data %d\n' % (len(log_msg), ))
        self.f.write('%s\n' % (log_msg, ))
        for cvs_rev in svn_commit.get_cvs_items():
            self.revision_writer.process_revision(cvs_rev, post_commit=False)

        self.f.write('\n')
        self._mirror.end_commit()

    def process_post_commit(self, svn_commit):
        author = self._get_author(svn_commit)
        log_msg = self._get_log_msg(svn_commit)

        source_lods = set()
        for cvs_rev in svn_commit.cvs_revs:
            source_lods.add(cvs_rev.lod)
        if len(source_lods) != 1:
            raise InternalError('Commit is from %d LODs' %
                                (len(source_lods), ))
        source_lod = source_lods.pop()

        self._mirror.start_commit(svn_commit.revnum)
        # FIXME: is this correct?:
        self.f.write('commit refs/heads/master\n')
        self.f.write('mark :%d\n' %
                     (self._create_commit_mark(None, svn_commit.revnum), ))
        self.f.write('committer %s %d +0000\n' % (
            author,
            svn_commit.date,
        ))
        self.f.write('data %d\n' % (len(log_msg), ))
        self.f.write('%s\n' % (log_msg, ))
        self.f.write('merge :%d\n' %
                     (self._get_source_mark(source_lod, svn_commit.revnum), ))
        for cvs_rev in svn_commit.cvs_revs:
            self.revision_writer.process_revision(cvs_rev, post_commit=True)

        self.f.write('\n')
        self._mirror.end_commit()

    def _get_source_groups(self, svn_commit):
        """Return groups of sources for SVN_COMMIT.

    SVN_COMMIT is an instance of SVNSymbolCommit.  Yield tuples
    (source_lod, svn_revnum, cvs_symbols) where source_lod is the line
    of development and svn_revnum is the revision that should serve as
    a source, and cvs_symbols is a list of CVSSymbolItems that can be
    copied from that source.  The groups are returned in arbitrary
    order."""

        # Get a map {CVSSymbol : SVNRevisionRange}:
        range_map = self._symbolings_reader.get_range_map(svn_commit)

        # range_map, split up into one map per LOD; i.e., {LOD :
        # {CVSSymbol : SVNRevisionRange}}:
        lod_range_maps = {}

        for (cvs_symbol, range) in range_map.iteritems():
            lod_range_map = lod_range_maps.get(range.source_lod)
            if lod_range_map is None:
                lod_range_map = {}
                lod_range_maps[range.source_lod] = lod_range_map
            lod_range_map[cvs_symbol] = range

        # Sort the sources so that the branch that serves most often as
        # parent is processed first:
        lod_ranges = lod_range_maps.items()
        lod_ranges.sort(
            lambda (lod1, lod_range_map1), (lod2, lod_range_map2): -cmp(
                len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2))

        for (lod, lod_range_map) in lod_ranges:
            while lod_range_map:
                revision_scores = RevisionScores(lod_range_map.values())
                (source_lod, revnum, score) = revision_scores.get_best_revnum()
                assert source_lod == lod
                cvs_symbols = []
                for (cvs_symbol, range) in lod_range_map.items():
                    if revnum in range:
                        cvs_symbols.append(cvs_symbol)
                        del lod_range_map[cvs_symbol]
                yield (lod, revnum, cvs_symbols)

    def _get_all_files(self, node):
        """Generate all of the CVSFiles under NODE."""

        for cvs_path in node:
            subnode = node[cvs_path]
            if subnode is None:
                yield cvs_path
            else:
                for sub_cvs_path in self._get_all_files(subnode):
                    yield sub_cvs_path

    def _is_simple_copy(self, svn_commit, source_groups):
        """Return True iff SVN_COMMIT can be created as a simple copy.

    SVN_COMMIT is an SVNTagCommit.  Return True iff it can be created
    as a simple copy from an existing revision (i.e., if the fixup
    branch can be avoided for this tag creation)."""

        # The first requirement is that there be exactly one source:
        if len(source_groups) != 1:
            return False

        (source_lod, svn_revnum, cvs_symbols) = source_groups[0]

        # The second requirement is that the destination LOD not already
        # exist:
        try:
            self._mirror.get_current_lod_directory(svn_commit.symbol)
        except KeyError:
            # The LOD doesn't already exist.  This is good.
            pass
        else:
            # The LOD already exists.  It cannot be created by a copy.
            return False

        # The third requirement is that the source LOD contains exactly
        # the same files as we need to add to the symbol:
        try:
            source_node = self._mirror.get_old_lod_directory(
                source_lod, svn_revnum)
        except KeyError:
            raise InternalError('Source %r does not exist' % (source_lod, ))
        return (set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols
                     ]) == set(self._get_all_files(source_node)))

    def _get_source_mark(self, source_lod, revnum):
        """Return the mark active on SOURCE_LOD at the end of REVNUM."""

        modifications = self._marks[source_lod]
        i = bisect.bisect_left(modifications, (revnum + 1, )) - 1
        (revnum, mark) = modifications[i]
        return mark

    def _process_symbol_commit(self, svn_commit, git_branch, source_groups,
                               mark):
        author = self._get_author(svn_commit)
        log_msg = self._get_log_msg(svn_commit)

        self.f.write('commit %s\n' % (git_branch, ))
        self.f.write('mark :%d\n' % (mark, ))
        self.f.write('committer %s %d +0000\n' % (
            author,
            svn_commit.date,
        ))
        self.f.write('data %d\n' % (len(log_msg), ))
        self.f.write('%s\n' % (log_msg, ))

        for (
                source_lod,
                source_revnum,
                cvs_symbols,
        ) in source_groups:
            self.f.write('merge :%d\n' %
                         (self._get_source_mark(source_lod, source_revnum), ))

        for (
                source_lod,
                source_revnum,
                cvs_symbols,
        ) in source_groups:
            for cvs_symbol in cvs_symbols:
                self.revision_writer.branch_file(cvs_symbol)

        self.f.write('\n')

    def process_branch_commit(self, svn_commit):
        self._mirror.start_commit(svn_commit.revnum)
        source_groups = list(self._get_source_groups(svn_commit))
        for groups in get_chunks(source_groups, self.max_merges):
            self._process_symbol_commit(
                svn_commit,
                'refs/heads/%s' % (svn_commit.symbol.name, ),
                groups,
                self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
            )
        self._mirror.end_commit()

    def _set_symbol(self, symbol, mark):
        if isinstance(symbol, Branch):
            category = 'heads'
        elif isinstance(symbol, Tag):
            category = 'tags'
        else:
            raise InternalError()
        self.f.write('reset refs/%s/%s\n' % (
            category,
            symbol.name,
        ))
        self.f.write('from :%d\n' % (mark, ))

    def process_tag_commit(self, svn_commit):
        # FIXME: For now we create a fixup branch with the same name as
        # the tag, then the tag.  We never delete the fixup branch.  Also,
        # a fixup branch is created even if the tag could be created from
        # a single source.
        self._mirror.start_commit(svn_commit.revnum)

        source_groups = list(self._get_source_groups(svn_commit))
        if self._is_simple_copy(svn_commit, source_groups):
            (source_lod, source_revnum, cvs_symbols) = source_groups[0]
            Log().debug('%s will be created via a simple copy from %s:r%d' % (
                svn_commit.symbol,
                source_lod,
                source_revnum,
            ))
            mark = self._get_source_mark(source_lod, source_revnum)
            self._set_symbol(svn_commit.symbol, mark)
        else:
            Log().debug('%s will be created via a fixup branch' %
                        (svn_commit.symbol, ))

            # Create the fixup branch (which might involve making more than
            # one commit):
            for groups in get_chunks(source_groups, self.max_merges):
                mark = self._create_commit_mark(svn_commit.symbol,
                                                svn_commit.revnum)
                self._process_symbol_commit(svn_commit, FIXUP_BRANCH_NAME,
                                            groups, mark)

            # Store the mark of the last commit to the fixup branch as the
            # value of the tag:
            self._set_symbol(svn_commit.symbol, mark)
            self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME, ))
            self.f.write('\n')

        self._mirror.end_commit()

    def cleanup(self):
        self.revision_writer.finish()
        self._mirror.close()
        self.f.close()
        del self.f
        self._symbolings_reader.close()
        del self._symbolings_reader
コード例 #38
0
 def start(self):
     self.revision_reader.start()
     self.dump_file = open(self.blob_filename, 'wb')
     self._mark_generator = KeyGenerator()
コード例 #39
0
class SVNRepositoryMirror:
    """Mirror a Subversion repository and its history.

  Mirror a Subversion repository as it is constructed, one SVNCommit
  at a time.  For each LineOfDevelopment we store a skeleton of the
  directory structure within that LOD for each SVN revision number in
  which it changed.  The creation of a dumpfile or Subversion
  repository is handled by delegates.  See the add_delegate() method
  for how to set delegates.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  SVNRevision in which files were added to or deleted from that LOD,
  as well as the node id of the node tree describing the LOD contents
  at that SVN revision.

  The LOD trees themselves are stored in the _nodes_db database, which
  maps node ids to nodes.  A node is a map from CVSPath.id to ids of
  the corresponding subnodes.  The _nodes_db is stored on disk and
  each access is expensive.

  The _nodes_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each SVNCommit and
  end_commit() afterwards.

  *** WARNING *** Path arguments to methods in this class MUST NOT
      have leading or trailing slashes."""
    class ParentMissingError(Exception):
        """The parent of a path is missing.

    Exception raised if an attempt is made to add a path to the
    repository mirror but the parent's path doesn't exist in the
    youngest revision of the repository."""

        pass

    class PathExistsError(Exception):
        """The path already exists in the repository.

    Exception raised if an attempt is made to add a path to the
    repository mirror and that path already exists in the youngest
    revision of the repository."""

        pass

    def register_artifacts(self, which_pass):
        """Register the artifacts that will be needed for this object."""

        artifact_manager.register_temp_file(
            config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass)
        artifact_manager.register_temp_file(config.SVN_MIRROR_NODES_STORE,
                                            which_pass)

    def open(self):
        """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

        self._key_generator = KeyGenerator()

        self._delegates = []

        # A map from LOD to LODHistory instance for all LODs that have
        # been defines so far:
        self._lod_histories = {}

        # This corresponds to the 'nodes' table in a Subversion fs.  (We
        # don't need a 'representations' or 'strings' table because we
        # only track metadata, not file contents.)
        self._nodes_db = IndexedDatabase(
            artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
            artifact_manager.get_temp_file(
                config.SVN_MIRROR_NODES_INDEX_TABLE),
            DB_OPEN_NEW,
            serializer=_NodeSerializer())

        # Start at revision 0 without a root node.  It will be created
        # by _open_writable_root_node.
        self._youngest = 0

    def start_commit(self, revnum, revprops):
        """Start a new commit."""

        self._youngest = revnum

        # A map {node_id : {CVSPath : node_id}}.
        self._new_nodes = {}

        self._invoke_delegates('start_commit', revnum, revprops)

    def end_commit(self):
        """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

        # Copy the new nodes to the _nodes_db
        for id, value in self._new_nodes.items():
            self._nodes_db[id] = value

        del self._new_nodes

        self._invoke_delegates('end_commit')

    def _get_lod_history(self, lod):
        """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

        try:
            return self._lod_histories[lod]
        except KeyError:
            lod_history = LODHistory()
            self._lod_histories[lod] = lod_history
            return lod_history

    def _create_empty_node(self):
        """Create and return a new, empty, writable node."""

        new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {})
        self._new_nodes[new_node.id] = new_node.entries
        return new_node

    def _copy_node(self, old_node):
        """Create and return a new, writable node that is a copy of OLD_NODE."""

        new_node = _WritableMirrorNode(self, self._key_generator.gen_id(),
                                       old_node.entries.copy())

        self._new_nodes[new_node.id] = new_node.entries
        return new_node

    def _get_node(self, id):
        """Return the node for id ID.

    The node might be read from either self._nodes_db or
    self._new_nodes.  Return an instance of _MirrorNode."""

        try:
            return _WritableMirrorNode(self, id, self._new_nodes[id])
        except KeyError:
            return _ReadOnlyMirrorNode(self, id, self._nodes_db[id])

    def _open_readonly_lod_node(self, lod, revnum):
        """Open a readonly node for the root path of LOD at revision REVNUM.

    Return an instance of _MirrorNode if the path exists; otherwise,
    raise KeyError."""

        lod_history = self._get_lod_history(lod)
        node_id = lod_history.get_id(revnum)
        return self._get_node(node_id)

    def _open_readonly_node(self, cvs_path, lod, revnum):
        """Open a readonly node for CVS_PATH from LOD at REVNUM.

    If cvs_path refers to a leaf node, return None.

    Raise KeyError if the node does not exist."""

        if cvs_path.parent_directory is None:
            return self._open_readonly_lod_node(lod, revnum)
        else:
            parent_node = self._open_readonly_node(cvs_path.parent_directory,
                                                   lod, revnum)
            return parent_node[cvs_path]

    def _open_writable_lod_node(self, lod, create, invoke_delegates=True):
        """Open a writable node for the root path in LOD.

    Iff CREATE is True, create the path and any missing directories.
    Return an instance of _WritableMirrorNode.  Raise KeyError if the
    path doesn't already exist and CREATE is not set."""

        lod_history = self._get_lod_history(lod)
        try:
            id = lod_history.get_id()
        except KeyError:
            if create:
                node = self._create_empty_node()
                lod_history.update(self._youngest, node.id)
                if invoke_delegates:
                    self._invoke_delegates('initialize_lod', lod)
            else:
                raise
        else:
            node = self._get_node(id)
            if not isinstance(node, _WritableMirrorNode):
                # Node was created in an earlier revision, so we have to copy
                # it to make it writable:
                node = self._copy_node(node)
                lod_history.update(self._youngest, node.id)

        return node

    def _open_writable_node(self, cvs_directory, lod, create):
        """Open a writable node for CVS_DIRECTORY in LOD.

    Iff CREATE is True, create a directory node at SVN_PATH and any
    missing directories.  Return an instance of _WritableMirrorNode.

    Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not
    set."""

        if cvs_directory.parent_directory is None:
            return self._open_writable_lod_node(lod, create)

        parent_node = self._open_writable_node(cvs_directory.parent_directory,
                                               lod, create)

        try:
            node = parent_node[cvs_directory]
        except KeyError:
            if create:
                # The component does not exist, so we create it.
                new_node = self._create_empty_node()
                parent_node[cvs_directory] = new_node
                self._invoke_delegates('mkdir', lod, cvs_directory)
                return new_node
            else:
                raise
        else:
            if isinstance(node, _WritableMirrorNode):
                return node
            elif isinstance(node, _ReadOnlyMirrorNode):
                new_node = self._copy_node(node)
                parent_node[cvs_directory] = new_node
                return new_node
            else:
                raise InternalError('Attempt to modify file at %s in mirror' %
                                    (cvs_directory, ))

    def delete_lod(self, lod):
        """Delete the main path for LOD from the tree.

    The path must currently exist.  Silently refuse to delete trunk
    paths."""

        if isinstance(lod, Trunk):
            # Never delete a Trunk path.
            return

        lod_history = self._get_lod_history(lod)
        if not lod_history.exists():
            raise KeyError()
        lod_history.update(self._youngest, None)
        self._invoke_delegates('delete_lod', lod)

    def delete_path(self, cvs_path, lod, should_prune=False):
        """Delete CVS_PATH from LOD."""

        if cvs_path.parent_directory is None:
            self.delete_lod(lod)
            return
        else:
            parent_node = self._open_writable_node(cvs_path.parent_directory,
                                                   lod, False)
            del parent_node[cvs_path]
            self._invoke_delegates('delete_path', lod, cvs_path)

            # The following recursion makes pruning an O(n^2) operation in the
            # worst case (where n is the depth of SVN_PATH), but the worst case
            # is probably rare, and the constant cost is pretty low.  Another
            # drawback is that we issue a delete for each path and not just
            # a single delete for the topmost directory pruned.
            if should_prune and len(parent_node) == 0:
                self.delete_path(cvs_path.parent_directory, lod, True)

    def initialize_project(self, project):
        """Create the basic structure for PROJECT."""

        self._invoke_delegates('initialize_project', project)

        self._open_writable_lod_node(Ctx()._symbol_db.get_symbol(
            project.trunk_id),
                                     create=True,
                                     invoke_delegates=False)

    def change_path(self, cvs_rev):
        """Register a change in self._youngest for the CVS_REV's svn_path."""

        # We do not have to update the nodes because our mirror is only
        # concerned with the presence or absence of paths, and a file
        # content change does not cause any path changes.
        self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))

    def add_path(self, cvs_rev):
        """Add the CVS_REV's svn_path to the repository mirror."""

        cvs_file = cvs_rev.cvs_file
        parent_node = self._open_writable_node(cvs_file.parent_directory,
                                               cvs_rev.lod, True)

        if cvs_file in parent_node:
            raise self.PathExistsError(
                'Attempt to add path \'%s\' to repository mirror '
                'when it already exists in the mirror.' %
                (cvs_rev.get_svn_path(), ))

        parent_node[cvs_file] = None

        self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))

    def copy_lod(self, src_lod, dest_lod, src_revnum):
        """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD.  Note that this node is not
    necessarily writable, though its parent node necessarily is."""

        dest_path = dest_lod.get_path()

        # Get the node of our src_path
        src_node = self._open_readonly_lod_node(src_lod, src_revnum)

        dest_lod_history = self._get_lod_history(dest_lod)
        if dest_lod_history.exists():
            raise self.PathExistsError(
                "Attempt to add path '%s' to repository mirror "
                "when it already exists in the mirror." % dest_path)

        dest_lod_history.update(self._youngest, src_node.id)

        self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)

        # This is a cheap copy, so src_node has the same contents as the
        # new destination node.
        return src_node

    def copy_path(self,
                  cvs_path,
                  src_lod,
                  dest_lod,
                  src_revnum,
                  create_parent=False):
        """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination's
    parent *must* exist unless CREATE_PARENT is specified.  But the
    destination itself *must not* exist.

    Return the new node at (CVS_PATH, DEST_LOD).  Note that this node
    is not necessarily writable, though its parent node necessarily
    is."""

        if cvs_path.parent_directory is None:
            return self.copy_lod(src_lod, dest_lod, src_revnum)

        # Get the node of our source, or None if it is a file:
        src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum)

        # Get the parent path of the destination:
        try:
            dest_parent_node = self._open_writable_node(
                cvs_path.parent_directory, dest_lod, create_parent)
        except KeyError:
            raise self.ParentMissingError(
                'Attempt to add path \'%s\' to repository mirror, '
                'but its parent directory doesn\'t exist in the mirror.' %
                (dest_lod.get_path(cvs_path.cvs_path), ))

        if cvs_path in dest_parent_node:
            raise self.PathExistsError(
                'Attempt to add path \'%s\' to repository mirror '
                'when it already exists in the mirror.' %
                (dest_lod.get_path(cvs_path.cvs_path), ))

        dest_parent_node[cvs_path] = src_node
        self._invoke_delegates('copy_path',
                               src_lod.get_path(cvs_path.cvs_path),
                               dest_lod.get_path(cvs_path.cvs_path),
                               src_revnum)

        # This is a cheap copy, so src_node has the same contents as the
        # new destination node.
        return src_node

    def fill_symbol(self, svn_symbol_commit, fill_source):
        """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.

    The symbolic name is guaranteed to exist in the Subversion
    repository by the end of this call, even if there are no paths
    under it."""

        symbol = svn_symbol_commit.symbol

        try:
            dest_node = self._open_writable_lod_node(symbol, False)
        except KeyError:
            dest_node = None
        self._fill_directory(symbol, dest_node, fill_source, None)

    def _prune_extra_entries(self, dest_cvs_path, symbol, dest_node,
                             src_entries):
        """Delete any entries in DEST_NODE that are not in SRC_ENTRIES.

    This might require creating a new writable node, so return a
    possibly-modified dest_node."""

        delete_list = [
            cvs_path for cvs_path in dest_node if cvs_path not in src_entries
        ]
        if delete_list:
            if not isinstance(dest_node, _WritableMirrorNode):
                dest_node = self._open_writable_node(dest_cvs_path, symbol,
                                                     False)
            # Sort the delete list so that the output is in a consistent
            # order:
            delete_list.sort()
            for cvs_path in delete_list:
                del dest_node[cvs_path]
                self._invoke_delegates('delete_path', symbol, cvs_path)

        return dest_node

    def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
        """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE, and recurse into the child items.

    Fill SYMBOL starting at the path FILL_SOURCE.cvs_path.  DEST_NODE
    is the node of this destination path, or None if the destination
    does not yet exist.  All directories above this path have already
    been filled.  FILL_SOURCE is a FillSource instance describing the
    items within a subtree of the repository that still need to be
    copied to the destination.

    PARENT_SOURCE is the SVNRevisionRange that was used to copy the
    parent directory, if it was copied in this commit.  We prefer to
    copy from the same source as was used for the parent, since it
    typically requires less touching-up.  If PARENT_SOURCE is None,
    then the parent directory was not copied in this commit, so no
    revision is preferable to any other."""

        copy_source = fill_source.compute_best_source(parent_source)

        # Figure out if we shall copy to this destination and delete any
        # destination path that is in the way.
        if dest_node is None:
            # The destination does not exist at all, so it definitely has to
            # be copied:
            dest_node = self.copy_path(fill_source.cvs_path,
                                       copy_source.source_lod, symbol,
                                       copy_source.opening_revnum)
        elif (parent_source is not None) and (
                copy_source.source_lod != parent_source.source_lod
                or copy_source.opening_revnum != parent_source.opening_revnum):
            # The parent path was copied from a different source than we
            # need to use, so we have to delete the version that was copied
            # with the parent then re-copy from the correct source:
            self.delete_path(fill_source.cvs_path, symbol)
            dest_node = self.copy_path(fill_source.cvs_path,
                                       copy_source.source_lod, symbol,
                                       copy_source.opening_revnum)
        else:
            copy_source = parent_source

        # Get the map {entry : FillSource} for entries within this
        # directory that need filling.
        src_entries = {}
        for (cvs_path, fill_subsource) in fill_source.get_subsources():
            src_entries[cvs_path] = fill_subsource

        if copy_source is not None:
            dest_node = self._prune_extra_entries(fill_source.cvs_path, symbol,
                                                  dest_node, src_entries)

        # Recurse into the SRC_ENTRIES ids sorted in alphabetical order.
        cvs_paths = src_entries.keys()
        cvs_paths.sort()
        for cvs_path in cvs_paths:
            if isinstance(cvs_path, CVSDirectory):
                # Path is a CVSDirectory:
                try:
                    dest_subnode = dest_node[cvs_path]
                except KeyError:
                    # Path didn't exist at all; it has to be created:
                    dest_subnode = None
                self._fill_directory(symbol, dest_subnode,
                                     src_entries[cvs_path], copy_source)
            else:
                # Path is a CVSFile:
                self._fill_file(symbol, cvs_path in dest_node,
                                src_entries[cvs_path], copy_source)

    def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
        """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE.

    Fill SYMBOL at path FILL_SOURCE.cvs_path.  DEST_NODE is the node
    of this destination path, or None if the destination does not yet
    exist.  All directories above this path have already been filled
    as needed.  FILL_SOURCE is a FillSource instance describing the
    item that needs to be copied to the destination.

    PARENT_SOURCE is the source from which the parent directory was
    copied, or None if the parent directory was not copied during this
    commit.  We prefer to copy from PARENT_SOURCE, since it typically
    requires less touching-up.  If PARENT_SOURCE is None, then the
    parent directory was not copied in this commit, so no revision is
    preferable to any other."""

        copy_source = fill_source.compute_best_source(parent_source)

        # Figure out if we shall copy to this destination and delete any
        # destination path that is in the way.
        if not dest_existed:
            # The destination does not exist at all, so it definitely has to
            # be copied:
            self.copy_path(fill_source.cvs_path, copy_source.source_lod,
                           symbol, copy_source.opening_revnum)
        elif (parent_source is not None) and (
                copy_source.source_lod != parent_source.source_lod
                or copy_source.opening_revnum != parent_source.opening_revnum):
            # The parent path was copied from a different source than we
            # need to use, so we have to delete the version that was copied
            # with the parent and then re-copy from the correct source:
            self.delete_path(fill_source.cvs_path, symbol)
            self.copy_path(fill_source.cvs_path, copy_source.source_lod,
                           symbol, copy_source.opening_revnum)

    def add_delegate(self, delegate):
        """Adds DELEGATE to self._delegates.

    For every delegate you add, as soon as SVNRepositoryMirror
    performs a repository action method, SVNRepositoryMirror will call
    the delegate's corresponding repository action method.  Multiple
    delegates will be called in the order that they are added.  See
    SVNRepositoryMirrorDelegate for more information."""

        self._delegates.append(delegate)

    def _invoke_delegates(self, method, *args):
        """Invoke a method on each delegate.

    Iterate through each of our delegates, in the order that they were
    added, and call the delegate's method named METHOD with the
    arguments in ARGS."""

        for delegate in self._delegates:
            getattr(delegate, method)(*args)

    def close(self):
        """Call the delegate finish methods and close databases."""

        self._invoke_delegates('finish')
        self._lod_histories = None
        self._nodes_db.close()
        self._nodes_db = None
コード例 #40
0
 def start(self):
   self.revision_reader.start()
   self.dump_file = open(self.blob_filename, 'wb')
   self._mark_generator = KeyGenerator()
コード例 #41
0
class SVNRepositoryMirror:
  """Mirror a Subversion repository and its history.

  Mirror a Subversion repository as it is constructed, one SVNCommit
  at a time.  For each LineOfDevelopment we store a skeleton of the
  directory structure within that LOD for each SVN revision number in
  which it changed.  The creation of a dumpfile or Subversion
  repository is handled by delegates.  See the add_delegate() method
  for how to set delegates.

  For each LOD that has been seen so far, an LODHistory instance is
  stored in self._lod_histories.  An LODHistory keeps track of each
  SVNRevision in which files were added to or deleted from that LOD,
  as well as the node id of the node tree describing the LOD contents
  at that SVN revision.

  The LOD trees themselves are stored in the _nodes_db database, which
  maps node ids to nodes.  A node is a map from CVSPath.id to ids of
  the corresponding subnodes.  The _nodes_db is stored on disk and
  each access is expensive.

  The _nodes_db database only holds the nodes for old revisions.  The
  revision that is being constructed is kept in memory in the
  _new_nodes map, which is cheap to access.

  You must invoke start_commit() before each SVNCommit and
  end_commit() afterwards.

  *** WARNING *** Path arguments to methods in this class MUST NOT
      have leading or trailing slashes."""

  class ParentMissingError(Exception):
    """The parent of a path is missing.

    Exception raised if an attempt is made to add a path to the
    repository mirror but the parent's path doesn't exist in the
    youngest revision of the repository."""

    pass

  class PathExistsError(Exception):
    """The path already exists in the repository.

    Exception raised if an attempt is made to add a path to the
    repository mirror and that path already exists in the youngest
    revision of the repository."""

    pass

  def register_artifacts(self, which_pass):
    """Register the artifacts that will be needed for this object."""

    artifact_manager.register_temp_file(
        config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass
        )
    artifact_manager.register_temp_file(
        config.SVN_MIRROR_NODES_STORE, which_pass
        )

  def open(self):
    """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""

    self._key_generator = KeyGenerator()

    self._delegates = [ ]

    # A map from LOD to LODHistory instance for all LODs that have
    # been defines so far:
    self._lod_histories = {}

    # This corresponds to the 'nodes' table in a Subversion fs.  (We
    # don't need a 'representations' or 'strings' table because we
    # only track metadata, not file contents.)
    self._nodes_db = IndexedDatabase(
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE),
        artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE),
        DB_OPEN_NEW, serializer=_NodeSerializer()
        )

    # Start at revision 0 without a root node.  It will be created
    # by _open_writable_root_node.
    self._youngest = 0

  def start_commit(self, revnum, revprops):
    """Start a new commit."""

    self._youngest = revnum

    # A map {node_id : {CVSPath : node_id}}.
    self._new_nodes = {}

    self._invoke_delegates('start_commit', revnum, revprops)

  def end_commit(self):
    """Called at the end of each commit.

    This method copies the newly created nodes to the on-disk nodes
    db."""

    # Copy the new nodes to the _nodes_db
    for id, value in self._new_nodes.items():
      self._nodes_db[id] = value

    del self._new_nodes

    self._invoke_delegates('end_commit')

  def _get_lod_history(self, lod):
    """Return the LODHistory instance describing LOD.

    Create a new (empty) LODHistory if it doesn't yet exist."""

    try:
      return self._lod_histories[lod]
    except KeyError:
      lod_history = LODHistory()
      self._lod_histories[lod] = lod_history
      return lod_history

  def _create_empty_node(self):
    """Create and return a new, empty, writable node."""

    new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {})
    self._new_nodes[new_node.id] = new_node.entries
    return new_node

  def _copy_node(self, old_node):
    """Create and return a new, writable node that is a copy of OLD_NODE."""

    new_node = _WritableMirrorNode(
        self, self._key_generator.gen_id(), old_node.entries.copy()
        )

    self._new_nodes[new_node.id] = new_node.entries
    return new_node

  def _get_node(self, id):
    """Return the node for id ID.

    The node might be read from either self._nodes_db or
    self._new_nodes.  Return an instance of _MirrorNode."""

    try:
      return _WritableMirrorNode(self, id, self._new_nodes[id])
    except KeyError:
      return _ReadOnlyMirrorNode(self, id, self._nodes_db[id])

  def _open_readonly_lod_node(self, lod, revnum):
    """Open a readonly node for the root path of LOD at revision REVNUM.

    Return an instance of _MirrorNode if the path exists; otherwise,
    raise KeyError."""

    lod_history = self._get_lod_history(lod)
    node_id = lod_history.get_id(revnum)
    return self._get_node(node_id)

  def _open_readonly_node(self, cvs_path, lod, revnum):
    """Open a readonly node for CVS_PATH from LOD at REVNUM.

    If cvs_path refers to a leaf node, return None.

    Raise KeyError if the node does not exist."""

    if cvs_path.parent_directory is None:
      return self._open_readonly_lod_node(lod, revnum)
    else:
      parent_node = self._open_readonly_node(
          cvs_path.parent_directory, lod, revnum
          )
      return parent_node[cvs_path]

  def _open_writable_lod_node(self, lod, create, invoke_delegates=True):
    """Open a writable node for the root path in LOD.

    Iff CREATE is True, create the path and any missing directories.
    Return an instance of _WritableMirrorNode.  Raise KeyError if the
    path doesn't already exist and CREATE is not set."""

    lod_history = self._get_lod_history(lod)
    try:
      id = lod_history.get_id()
    except KeyError:
      if create:
        node = self._create_empty_node()
        lod_history.update(self._youngest, node.id)
        if invoke_delegates:
          self._invoke_delegates('initialize_lod', lod)
      else:
        raise
    else:
      node = self._get_node(id)
      if not isinstance(node, _WritableMirrorNode):
        # Node was created in an earlier revision, so we have to copy
        # it to make it writable:
        node = self._copy_node(node)
        lod_history.update(self._youngest, node.id)

    return node

  def _open_writable_node(self, cvs_directory, lod, create):
    """Open a writable node for CVS_DIRECTORY in LOD.

    Iff CREATE is True, create a directory node at SVN_PATH and any
    missing directories.  Return an instance of _WritableMirrorNode.

    Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not
    set."""

    if cvs_directory.parent_directory is None:
      return self._open_writable_lod_node(lod, create)

    parent_node = self._open_writable_node(
        cvs_directory.parent_directory, lod, create
        )

    try:
      node = parent_node[cvs_directory]
    except KeyError:
      if create:
        # The component does not exist, so we create it.
        new_node = self._create_empty_node()
        parent_node[cvs_directory] = new_node
        self._invoke_delegates('mkdir', lod, cvs_directory)
        return new_node
      else:
        raise
    else:
      if isinstance(node, _WritableMirrorNode):
        return node
      elif isinstance(node, _ReadOnlyMirrorNode):
        new_node = self._copy_node(node)
        parent_node[cvs_directory] = new_node
        return new_node
      else:
        raise InternalError(
            'Attempt to modify file at %s in mirror' % (cvs_directory,)
            )

  def delete_lod(self, lod):
    """Delete the main path for LOD from the tree.

    The path must currently exist.  Silently refuse to delete trunk
    paths."""

    if isinstance(lod, Trunk):
      # Never delete a Trunk path.
      return

    lod_history = self._get_lod_history(lod)
    if not lod_history.exists():
      raise KeyError()
    lod_history.update(self._youngest, None)
    self._invoke_delegates('delete_lod', lod)

  def delete_path(self, cvs_path, lod, should_prune=False):
    """Delete CVS_PATH from LOD."""

    if cvs_path.parent_directory is None:
      self.delete_lod(lod)
      return
    else:
      parent_node = self._open_writable_node(
          cvs_path.parent_directory, lod, False
          )
      del parent_node[cvs_path]
      self._invoke_delegates('delete_path', lod, cvs_path)

      # The following recursion makes pruning an O(n^2) operation in the
      # worst case (where n is the depth of SVN_PATH), but the worst case
      # is probably rare, and the constant cost is pretty low.  Another
      # drawback is that we issue a delete for each path and not just
      # a single delete for the topmost directory pruned.
      if should_prune and len(parent_node) == 0:
        self.delete_path(cvs_path.parent_directory, lod, True)

  def initialize_project(self, project):
    """Create the basic structure for PROJECT."""

    self._invoke_delegates('initialize_project', project)

    self._open_writable_lod_node(
        Ctx()._symbol_db.get_symbol(project.trunk_id),
        create=True, invoke_delegates=False
        )

  def change_path(self, cvs_rev):
    """Register a change in self._youngest for the CVS_REV's svn_path."""

    # We do not have to update the nodes because our mirror is only
    # concerned with the presence or absence of paths, and a file
    # content change does not cause any path changes.
    self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))

  def add_path(self, cvs_rev):
    """Add the CVS_REV's svn_path to the repository mirror."""

    cvs_file = cvs_rev.cvs_file
    parent_node = self._open_writable_node(
        cvs_file.parent_directory, cvs_rev.lod, True
        )

    if cvs_file in parent_node:
      raise self.PathExistsError(
          'Attempt to add path \'%s\' to repository mirror '
          'when it already exists in the mirror.'
          % (cvs_rev.get_svn_path(),)
          )

    parent_node[cvs_file] = None

    self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))

  def copy_lod(self, src_lod, dest_lod, src_revnum):
    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination LOD
    *must not* already exist.

    Return the new node at DEST_LOD.  Note that this node is not
    necessarily writable, though its parent node necessarily is."""

    dest_path = dest_lod.get_path()

    # Get the node of our src_path
    src_node = self._open_readonly_lod_node(src_lod, src_revnum)

    dest_lod_history = self._get_lod_history(dest_lod)
    if dest_lod_history.exists():
      raise self.PathExistsError(
          "Attempt to add path '%s' to repository mirror "
          "when it already exists in the mirror." % dest_path
          )

    dest_lod_history.update(self._youngest, src_node.id)

    self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)

    # This is a cheap copy, so src_node has the same contents as the
    # new destination node.
    return src_node

  def copy_path(
        self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False
        ):
    """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.

    In the youngest revision of the repository, the destination's
    parent *must* exist unless CREATE_PARENT is specified.  But the
    destination itself *must not* exist.

    Return the new node at (CVS_PATH, DEST_LOD).  Note that this node
    is not necessarily writable, though its parent node necessarily
    is."""

    if cvs_path.parent_directory is None:
      return self.copy_lod(src_lod, dest_lod, src_revnum)

    # Get the node of our source, or None if it is a file:
    src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum)

    # Get the parent path of the destination:
    try:
      dest_parent_node = self._open_writable_node(
          cvs_path.parent_directory, dest_lod, create_parent
          )
    except KeyError:
      raise self.ParentMissingError(
          'Attempt to add path \'%s\' to repository mirror, '
          'but its parent directory doesn\'t exist in the mirror.'
          % (dest_lod.get_path(cvs_path.cvs_path),)
          )

    if cvs_path in dest_parent_node:
      raise self.PathExistsError(
          'Attempt to add path \'%s\' to repository mirror '
          'when it already exists in the mirror.'
          % (dest_lod.get_path(cvs_path.cvs_path),)
          )

    dest_parent_node[cvs_path] = src_node
    self._invoke_delegates(
        'copy_path',
        src_lod.get_path(cvs_path.cvs_path),
        dest_lod.get_path(cvs_path.cvs_path),
        src_revnum
        )

    # This is a cheap copy, so src_node has the same contents as the
    # new destination node.
    return src_node

  def fill_symbol(self, svn_symbol_commit, fill_source):
    """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.

    The symbolic name is guaranteed to exist in the Subversion
    repository by the end of this call, even if there are no paths
    under it."""

    symbol = svn_symbol_commit.symbol

    try:
      dest_node = self._open_writable_lod_node(symbol, False)
    except KeyError:
      dest_node = None
    self._fill_directory(symbol, dest_node, fill_source, None)

  def _prune_extra_entries(
        self, dest_cvs_path, symbol, dest_node, src_entries
        ):
    """Delete any entries in DEST_NODE that are not in SRC_ENTRIES.

    This might require creating a new writable node, so return a
    possibly-modified dest_node."""

    delete_list = [
        cvs_path
        for cvs_path in dest_node
        if cvs_path not in src_entries
        ]
    if delete_list:
      if not isinstance(dest_node, _WritableMirrorNode):
        dest_node = self._open_writable_node(dest_cvs_path, symbol, False)
      # Sort the delete list so that the output is in a consistent
      # order:
      delete_list.sort()
      for cvs_path in delete_list:
        del dest_node[cvs_path]
        self._invoke_delegates('delete_path', symbol, cvs_path)

    return dest_node

  def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE, and recurse into the child items.

    Fill SYMBOL starting at the path FILL_SOURCE.cvs_path.  DEST_NODE
    is the node of this destination path, or None if the destination
    does not yet exist.  All directories above this path have already
    been filled.  FILL_SOURCE is a FillSource instance describing the
    items within a subtree of the repository that still need to be
    copied to the destination.

    PARENT_SOURCE is the SVNRevisionRange that was used to copy the
    parent directory, if it was copied in this commit.  We prefer to
    copy from the same source as was used for the parent, since it
    typically requires less touching-up.  If PARENT_SOURCE is None,
    then the parent directory was not copied in this commit, so no
    revision is preferable to any other."""

    copy_source = fill_source.compute_best_source(parent_source)

    # Figure out if we shall copy to this destination and delete any
    # destination path that is in the way.
    if dest_node is None:
      # The destination does not exist at all, so it definitely has to
      # be copied:
      dest_node = self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    elif (parent_source is not None) and (
          copy_source.source_lod != parent_source.source_lod
          or copy_source.opening_revnum != parent_source.opening_revnum
          ):
      # The parent path was copied from a different source than we
      # need to use, so we have to delete the version that was copied
      # with the parent then re-copy from the correct source:
      self.delete_path(fill_source.cvs_path, symbol)
      dest_node = self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    else:
      copy_source = parent_source

    # Get the map {entry : FillSource} for entries within this
    # directory that need filling.
    src_entries = {}
    for (cvs_path, fill_subsource) in fill_source.get_subsources():
      src_entries[cvs_path] = fill_subsource

    if copy_source is not None:
      dest_node = self._prune_extra_entries(
          fill_source.cvs_path, symbol, dest_node, src_entries
          )

    # Recurse into the SRC_ENTRIES ids sorted in alphabetical order.
    cvs_paths = src_entries.keys()
    cvs_paths.sort()
    for cvs_path in cvs_paths:
      if isinstance(cvs_path, CVSDirectory):
        # Path is a CVSDirectory:
        try:
          dest_subnode = dest_node[cvs_path]
        except KeyError:
          # Path didn't exist at all; it has to be created:
          dest_subnode = None
        self._fill_directory(
            symbol, dest_subnode, src_entries[cvs_path], copy_source
            )
      else:
        # Path is a CVSFile:
        self._fill_file(
            symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source
            )

  def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.

    Use items from FILL_SOURCE.

    Fill SYMBOL at path FILL_SOURCE.cvs_path.  DEST_NODE is the node
    of this destination path, or None if the destination does not yet
    exist.  All directories above this path have already been filled
    as needed.  FILL_SOURCE is a FillSource instance describing the
    item that needs to be copied to the destination.

    PARENT_SOURCE is the source from which the parent directory was
    copied, or None if the parent directory was not copied during this
    commit.  We prefer to copy from PARENT_SOURCE, since it typically
    requires less touching-up.  If PARENT_SOURCE is None, then the
    parent directory was not copied in this commit, so no revision is
    preferable to any other."""

    copy_source = fill_source.compute_best_source(parent_source)

    # Figure out if we shall copy to this destination and delete any
    # destination path that is in the way.
    if not dest_existed:
      # The destination does not exist at all, so it definitely has to
      # be copied:
      self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )
    elif (parent_source is not None) and (
          copy_source.source_lod != parent_source.source_lod
          or copy_source.opening_revnum != parent_source.opening_revnum
          ):
      # The parent path was copied from a different source than we
      # need to use, so we have to delete the version that was copied
      # with the parent and then re-copy from the correct source:
      self.delete_path(fill_source.cvs_path, symbol)
      self.copy_path(
          fill_source.cvs_path, copy_source.source_lod,
          symbol, copy_source.opening_revnum
          )

  def add_delegate(self, delegate):
    """Adds DELEGATE to self._delegates.

    For every delegate you add, as soon as SVNRepositoryMirror
    performs a repository action method, SVNRepositoryMirror will call
    the delegate's corresponding repository action method.  Multiple
    delegates will be called in the order that they are added.  See
    SVNRepositoryMirrorDelegate for more information."""

    self._delegates.append(delegate)

  def _invoke_delegates(self, method, *args):
    """Invoke a method on each delegate.

    Iterate through each of our delegates, in the order that they were
    added, and call the delegate's method named METHOD with the
    arguments in ARGS."""

    for delegate in self._delegates:
      getattr(delegate, method)(*args)

  def close(self):
    """Call the delegate finish methods and close databases."""

    self._invoke_delegates('finish')
    self._lod_histories = None
    self._nodes_db.close()
    self._nodes_db = None
コード例 #42
0
class GitRevisionRecorder(FulltextRevisionRecorder):
  """Output file revisions to git-fast-import."""

  def __init__(self, blob_filename):
    self.blob_filename = blob_filename

  def start(self):
    self.dump_file = open(self.blob_filename, 'wb')
    self._mark_generator = KeyGenerator()

  def start_file(self, cvs_file_items):
    self._cvs_file_items = cvs_file_items

  def _get_original_source(self, cvs_rev):
    """Return the first CVSRevision with the content of CVS_REV.

    'First' here refers to deltatext order; i.e., the very first
    revision is HEAD on trunk, then backwards to the root of a branch,
    then out to the tip of a branch.

    If there is no other CVSRevision that have the same content,
    return CVS_REV itself."""

    while True:
      if cvs_rev.deltatext_exists:
        return cvs_rev
      if isinstance(cvs_rev.lod, Trunk):
        if cvs_rev.next_id is None:
          # The HEAD revision on trunk is always its own source, even
          # if its deltatext (i.e., its fulltext) is empty:
          return cvs_rev
        else:
          cvs_rev = self._cvs_file_items[cvs_rev.next_id]
      else:
        cvs_rev = self._cvs_file_items[cvs_rev.prev_id]

  def record_fulltext(self, cvs_rev, log, fulltext):
    """Write the fulltext to a blob if it is original.

    To find the 'original' revision, we follow the CVS
    delta-dependency chain backwards until we find a file that has a
    deltatext.  The reason we go to this trouble is to avoid writing
    the same file contents multiple times for a string of revisions
    that don't have deltatexts (as, for example, happens with dead
    revisions and imported revisions)."""

    source = self._get_original_source(cvs_rev)

    if source.id == cvs_rev.id:
      # Revision is its own source; write it out:
      mark = self._mark_generator.gen_id()
      self.dump_file.write('blob\n')
      self.dump_file.write('mark :%d\n' % (mark,))
      self.dump_file.write('data %d\n' % (len(fulltext),))
      self.dump_file.write(fulltext)
      self.dump_file.write('\n')
      return mark
    else:
      # Return as revision_recorder_token the CVSRevision.id of the
      # original source revision:
      return source.revision_recorder_token

  def finish_file(self, cvs_file_items):
    # Determine the original source of each CVSSymbol, and store it as
    # the symbol's revision_recorder_token.
    for cvs_item in cvs_file_items.values():
      if isinstance(cvs_item, CVSSymbol):
        cvs_source = cvs_file_items[cvs_item.source_id]
        while not isinstance(cvs_source, CVSRevision):
          cvs_source = cvs_file_items[cvs_source.source_id]
        cvs_item.revision_recorder_token = cvs_source.revision_recorder_token

    del self._cvs_file_items

  def finish(self):
    self.dump_file.close()