def remove_unneeded_initial_trunk_delete(self, metadata_db): """Remove unneeded deletes for this file. If a file is added on a branch, then a trunk revision is added at the same time in the 'Dead' state. This revision doesn't do anything useful, so delete it.""" for id in self.root_ids: cvs_item = self[id] if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db): logger.debug('Removing unnecessary delete %s' % (cvs_item, )) # Sever any CVSBranches rooted at cvs_item. for cvs_branch_id in cvs_item.branch_ids[:]: cvs_branch = self[cvs_branch_id] self._sever_branch(self.get_lod_items(cvs_branch)) # Tagging a dead revision doesn't do anything, so remove any # CVSTags that refer to cvs_item: while cvs_item.tag_ids: del self[cvs_item.tag_ids.pop()] # Now delete cvs_item itself: self.root_ids.remove(cvs_item.id) del self[cvs_item.id] if cvs_item.next_id is not None: cvs_rev_next = self[cvs_item.next_id] cvs_rev_next.prev_id = None self.root_ids.add(cvs_rev_next.id) # This can only happen once per file, so we're done: return
def flush(self): logger.debug('Flushing cache for %s' % (self,)) pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty] if pairs: pairs.sort() old_i = None f = self.f for (i, s) in pairs: if i == old_i: # No seeking needed pass elif i <= self._limit_written: # Just jump there: f.seek(i * self._record_len) else: # Jump to the end of the file then write _empty_values until # we reach the correct location: f.seek(self._limit_written * self._record_len) while self._limit_written < i: f.write(self.packer.empty_value) self._limit_written += 1 f.write(s) old_i = i + 1 self._limit_written = max(self._limit_written, old_i) self.f.flush() self._cache.clear()
def write_new_nodes(self, nodes): """Write NODES to the database. NODES is an iterable of writable CurrentMirrorDirectory instances.""" if len(self._cache) > self._cache_max_size: # The size of the cache has exceeded the threshold. Discard the # old cache values (but still store the new nodes into the # cache): logger.debug('Clearing node cache') self._cache.clear() data = {} max_node_id = 0 for node in nodes: max_node_id = max(max_node_id, node.id) data[node.id] = self._dump(node._entries) self._cache[node.id] = node._entries self.db[len(self._max_node_ids)] = data if max_node_id == 0: # Rewrite last value: self._max_node_ids.append(self._max_node_ids[-1]) else: self._max_node_ids.append(max_node_id)
def _process_branch_changeset(self, changeset, timestamp): """Process BranchChangeset CHANGESET, producing a SVNBranchCommit. Filter out CVSBranchNoops. If no CVSBranches are left, don't generate a SVNBranchCommit.""" if Ctx().trunk_only: raise InternalError( 'BranchChangeset encountered during a --trunk-only conversion') cvs_branches = [ cvs_branch for cvs_branch in changeset.iter_cvs_items() if not isinstance(cvs_branch, CVSBranchNoop) ] if cvs_branches: svn_commit = SVNBranchCommit( changeset.symbol, [cvs_branch.id for cvs_branch in cvs_branches], timestamp, self.revnum_generator.gen_id(), ) yield svn_commit for cvs_branch in cvs_branches: Ctx()._symbolings_logger.log_branch_revision( cvs_branch, svn_commit.revnum ) else: logger.debug( 'Omitting %r because it contains only CVSBranchNoops' % (changeset,) )
def _process_branch_changeset(self, changeset, timestamp): """Process BranchChangeset CHANGESET, producing a SVNBranchCommit. Filter out CVSBranchNoops. If no CVSBranches are left, don't generate a SVNBranchCommit.""" if Ctx().trunk_only: raise InternalError( 'BranchChangeset encountered during a --trunk-only conversion') cvs_branches = [ cvs_branch for cvs_branch in changeset.iter_cvs_items() if not isinstance(cvs_branch, CVSBranchNoop) ] if cvs_branches: svn_commit = SVNBranchCommit( changeset.symbol, [cvs_branch.id for cvs_branch in cvs_branches], timestamp, self.revnum_generator.gen_id(), ) yield svn_commit for cvs_branch in cvs_branches: Ctx()._symbolings_logger.log_branch_revision( cvs_branch, svn_commit.revnum) else: logger.debug( 'Omitting %r because it contains only CVSBranchNoops' % (changeset, ))
def remove_unneeded_initial_trunk_delete(self, metadata_db): """Remove unneeded deletes for this file. If a file is added on a branch, then a trunk revision is added at the same time in the 'Dead' state. This revision doesn't do anything useful, so delete it.""" for id in self.root_ids: cvs_item = self[id] if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db): logger.debug('Removing unnecessary delete %s' % (cvs_item,)) # Sever any CVSBranches rooted at cvs_item. for cvs_branch_id in cvs_item.branch_ids[:]: cvs_branch = self[cvs_branch_id] self._sever_branch(self.get_lod_items(cvs_branch)) # Tagging a dead revision doesn't do anything, so remove any # CVSTags that refer to cvs_item: while cvs_item.tag_ids: del self[cvs_item.tag_ids.pop()] # Now delete cvs_item itself: self.root_ids.remove(cvs_item.id) del self[cvs_item.id] if cvs_item.next_id is not None: cvs_rev_next = self[cvs_item.next_id] cvs_rev_next.prev_id = None self.root_ids.add(cvs_rev_next.id) # This can only happen once per file, so we're done: return
def flush(self): logger.debug('Flushing cache for %s' % (self, )) pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty] if pairs: pairs.sort() old_i = None f = self.f for (i, s) in pairs: if i == old_i: # No seeking needed pass elif i <= self._limit_written: # Just jump there: f.seek(i * self._record_len) else: # Jump to the end of the file then write _empty_values until # we reach the correct location: f.seek(self._limit_written * self._record_len) while self._limit_written < i: f.write(self.packer.empty_value) self._limit_written += 1 f.write(s) old_i = i + 1 self._limit_written = max(self._limit_written, old_i) self.f.flush() self._cache.clear()
def process_branch_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) source_groups = self._get_source_groups(svn_commit) if self._is_simple_copy(svn_commit, source_groups): (source_revnum, source_lod, cvs_symbols) = source_groups[0] logger.debug('%s will be created via a simple copy from %s:r%d' % ( svn_commit.symbol, source_lod, source_revnum, )) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug('%s will be created via fixup commit(s)' % (svn_commit.symbol, )) self._process_symbol_commit( svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name, ), source_groups, ) self._mirror.end_commit()
def call_command(command, **kw): """Call the specified command, checking that it exits successfully. Raise a FatalError if the command cannot be executed, or if it exits with a non-zero exit code. Pass KW as keyword arguments to subprocess.call().""" logger.debug('Running command %r' % (command,)) try: retcode = subprocess.call(command, **kw) if retcode < 0: raise FatalError( 'Command terminated by signal %d: "%s"' % (-retcode, ' '.join(command),) ) elif retcode > 0: raise FatalError( 'Command failed with return code %d: "%s"' % (retcode, ' '.join(command),) ) except OSError, e: raise FatalError( 'Command execution failed (%s): "%s"' % (e, ' '.join(command),) )
def check_command_runs(command, commandname): """Check whether the command CMD can be executed without errors. CMD is a list or string, as accepted by subprocess.Popen(). CMDNAME is the name of the command as it should be included in exception error messages. This function checks three things: (1) the command can be run without throwing an OSError; (2) it exits with status=0; (3) it doesn't output anything to stderr. If any of these conditions is not met, raise a CommandFailedException describing the problem.""" logger.debug('Running command %r' % (command, )) try: pipe = subprocess.Popen( command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) except OSError, e: raise CommandFailedException('error executing %s: %s' % ( commandname, e, ))
def process_branch_commit(self, svn_commit): self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) if self._is_simple_copy(svn_commit, source_groups): (source_lod, source_revnum, cvs_symbols) = source_groups[0] logger.debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug( '%s will be created via fixup commit(s)' % (svn_commit.symbol,) ) self._process_symbol_commit( svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,), source_groups, self._create_commit_mark(svn_commit.symbol, svn_commit.revnum), ) self._mirror.end_commit()
def remove_initial_branch_deletes(self, metadata_db): """If the first revision on a branch is an unnecessary delete, remove it. If a file is added on a branch (whether or not it already existed on trunk), then new versions of CVS add a first branch revision in the 'dead' state (to indicate that the file did not exist on the branch when the branch was created) followed by the second branch revision, which is an add. When we encounter this situation, we sever the branch from trunk and delete the first branch revision.""" for lod_items in self.iter_lods(): if self._is_unneeded_initial_branch_delete(lod_items, metadata_db): cvs_revision = lod_items.cvs_revisions[0] logger.debug('Removing unnecessary initial branch delete %s' % (cvs_revision, )) # Sever the branch from its source if necessary: self._sever_branch(lod_items) # Delete the first revision on the branch: self.root_ids.remove(cvs_revision.id) del self[cvs_revision.id] # If it had a successor, adjust its backreference and add it # to the root_ids: if cvs_revision.next_id is not None: cvs_rev_next = self[cvs_revision.next_id] cvs_rev_next.prev_id = None self.root_ids.add(cvs_rev_next.id) # Tagging a dead revision doesn't do anything, so remove any # tags that were set on it: for tag_id in cvs_revision.tag_ids: del self[tag_id]
def process_tag_commit(self, svn_commit): # FIXME: For now we create a fixup branch with the same name as # the tag, then the tag. We never delete the fixup branch. self._mirror.start_commit(svn_commit.revnum) source_groups = list(self._get_source_groups(svn_commit)) if self._is_simple_copy(svn_commit, source_groups): (source_lod, source_revnum, cvs_symbols) = source_groups[0] logger.debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug( '%s will be created via a fixup branch' % (svn_commit.symbol,) ) fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit) # Create the fixup branch (which might involve making more than # one commit): mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum) self._process_symbol_commit( svn_commit, fixup_branch_name, source_groups, mark ) # Store the mark of the last commit to the fixup branch as the # value of the tag: self._set_symbol(svn_commit.symbol, mark) self.f.write('reset %s\n' % (fixup_branch_name,)) self.f.write('\n') if self.tie_tag_fixup_branches: source_lod = source_groups[0][0] source_lod_git_branch = \ 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),) mark2 = self._create_commit_mark(source_lod, svn_commit.revnum) author = self._map_author(Ctx().username) log_msg = self._get_log_msg_for_ancestry_tie(svn_commit) self.f.write('commit %s\n' % (source_lod_git_branch,)) self.f.write('mark :%d\n' % (mark2,)) self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) self.f.write( 'merge :%d\n' % (mark,) ) self.f.write('\n') self._mirror.end_commit()
def add_new_changeset(self, changeset): """Add the new CHANGESET to the graph and also to the databases.""" if logger.is_on(logger.DEBUG): logger.debug('Adding changeset %r' % (changeset,)) self.add_changeset(changeset) self.store_changeset(changeset)
def process_tag_commit(self, svn_commit): # FIXME: For now we create a fixup branch with the same name as # the tag, then the tag. We never delete the fixup branch. self._mirror.start_commit(svn_commit.revnum) source_groups = self._get_source_groups(svn_commit) if self._is_simple_copy(svn_commit, source_groups): (source_revnum, source_lod, cvs_symbols) = source_groups[0] logger.debug( '%s will be created via a simple copy from %s:r%d' % (svn_commit.symbol, source_lod, source_revnum,) ) mark = self._get_source_mark(source_lod, source_revnum) self._set_symbol(svn_commit.symbol, mark) self._mirror.copy_lod(source_lod, svn_commit.symbol, source_revnum) self._set_lod_mark(svn_commit.symbol, svn_commit.revnum, mark) else: logger.debug( '%s will be created via a fixup branch' % (svn_commit.symbol,) ) fixup_branch_name = self.get_tag_fixup_branch_name(svn_commit) # Create the fixup branch (which might involve making more than # one commit): mark = self._process_symbol_commit( svn_commit, fixup_branch_name, source_groups ) # Store the mark of the last commit to the fixup branch as the # value of the tag: self._set_symbol(svn_commit.symbol, mark) self.f.write('reset %s\n' % (fixup_branch_name,)) self.f.write('\n') if self.tie_tag_fixup_branches: source_lod = source_groups[0][1] source_lod_git_branch = \ 'refs/heads/%s' % (getattr(source_lod, 'name', 'master'),) mark2 = self._create_commit_mark(source_lod, svn_commit.revnum) author = self._map_author(Ctx().username) log_msg = self._get_log_msg_for_ancestry_tie(svn_commit) self.f.write('commit %s\n' % (source_lod_git_branch,)) self.f.write('mark :%d\n' % (mark2,)) self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,)) self.f.write('data %d\n' % (len(log_msg),)) self.f.write('%s\n' % (log_msg,)) self.f.write( 'merge :%d\n' % (mark,) ) self.f.write('\n') self._mirror.end_commit()
def _adjust_branch_parents(self, cvs_branch): """Adjust the parent of CVS_BRANCH if possible and preferred. CVS_BRANCH is an instance of CVSBranch. This method must be called in leaf-to-trunk order.""" # The Symbol that cvs_branch would like to have as a parent: preferred_parent = Ctx()._symbol_db.get_symbol( cvs_branch.symbol.preferred_parent_id) if cvs_branch.source_lod == preferred_parent: # The preferred parent is already the parent. return # The CVSRevision that is its direct parent: source = self[cvs_branch.source_id] # This is always a CVSRevision because we haven't adjusted it yet: assert isinstance(source, CVSRevision) if isinstance(preferred_parent, Trunk): # It is not possible to graft *onto* Trunk: return # Try to find the preferred parent among the possible parents: for branch_id in source.branch_ids: possible_parent = self[branch_id] if possible_parent.symbol == preferred_parent: # We found it! break elif possible_parent.symbol == cvs_branch.symbol: # Only branches that precede the branch to be adjusted are # considered possible parents. Leave parentage unchanged: return else: # This point should never be reached. raise InternalError( 'Possible parent search did not terminate as expected') parent = possible_parent assert isinstance(parent, CVSBranch) logger.debug('Grafting %s from %s (on %s) onto %s' % ( cvs_branch, source, source.lod, parent, )) # Switch parent: source.branch_ids.remove(cvs_branch.id) parent.branch_ids.append(cvs_branch.id) cvs_branch.source_lod = parent.symbol cvs_branch.source_id = parent.id
def delete_changeset(self, changeset): """Remove CHANGESET from the graph and also from the databases. In fact, we don't remove CHANGESET from self._cvs_item_to_changeset_id, because in practice the CVSItems in CHANGESET are always added again as part of a new CHANGESET, which will cause the old values to be overwritten.""" if logger.is_on(logger.DEBUG): logger.debug('Removing changeset %r' % (changeset,)) del self[changeset.id] del self._changeset_db[changeset.id]
def imported_remove_1_1(self, vendor_lod_items): """This file was imported. Remove the 1.1 revision if possible. VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch. See adjust_ntdbrs() for more information.""" assert vendor_lod_items.cvs_revisions cvs_rev = vendor_lod_items.cvs_revisions[0] if not isinstance(cvs_rev, CVSRevisionModification) \ or cvs_rev.deltatext_exists: return cvs_branch = vendor_lod_items.cvs_branch rev_1_1 = self[cvs_branch.source_id] assert isinstance(rev_1_1, CVSRevision) if rev_1_1.prev_id: # That's not a revision 1.1 after all, since it has a predecessor. return logger.debug('Removing unnecessary revision %s' % (rev_1_1,)) # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk: self._sever_branch(vendor_lod_items) # Delete rev_1_1: self.root_ids.remove(rev_1_1.id) del self[rev_1_1.id] rev_1_2_id = rev_1_1.next_id if rev_1_2_id is not None: rev_1_2 = self[rev_1_2_id] rev_1_2.prev_id = None self.root_ids.add(rev_1_2.id) # Move any tags and branches from rev_1_1 to cvs_rev: cvs_rev.tag_ids.extend(rev_1_1.tag_ids) for id in rev_1_1.tag_ids: cvs_tag = self[id] cvs_tag.source_lod = cvs_rev.lod cvs_tag.source_id = cvs_rev.id cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids for id in rev_1_1.branch_ids: cvs_branch = self[id] cvs_branch.source_lod = cvs_rev.lod cvs_branch.source_id = cvs_rev.id cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids for id in rev_1_1.branch_commit_ids: cvs_rev2 = self[id] cvs_rev2.prev_id = cvs_rev.id
def imported_remove_1_1(self, vendor_lod_items): """This file was imported. Remove the 1.1 revision if possible. VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch. See adjust_ntdbrs() for more information.""" assert vendor_lod_items.cvs_revisions cvs_rev = vendor_lod_items.cvs_revisions[0] if not isinstance(cvs_rev, CVSRevisionModification) \ or cvs_rev.deltatext_exists: return cvs_branch = vendor_lod_items.cvs_branch rev_1_1 = self[cvs_branch.source_id] assert isinstance(rev_1_1, CVSRevision) if rev_1_1.prev_id: # That's not a revision 1.1 after all, since it has a predecessor. return logger.debug('Removing unnecessary revision %s' % (rev_1_1, )) # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk: self._sever_branch(vendor_lod_items) # Delete rev_1_1: self.root_ids.remove(rev_1_1.id) del self[rev_1_1.id] rev_1_2_id = rev_1_1.next_id if rev_1_2_id is not None: rev_1_2 = self[rev_1_2_id] rev_1_2.prev_id = None self.root_ids.add(rev_1_2.id) # Move any tags and branches from rev_1_1 to cvs_rev: cvs_rev.tag_ids.extend(rev_1_1.tag_ids) for id in rev_1_1.tag_ids: cvs_tag = self[id] cvs_tag.source_lod = cvs_rev.lod cvs_tag.source_id = cvs_rev.id cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids for id in rev_1_1.branch_ids: cvs_branch = self[id] cvs_branch.source_lod = cvs_rev.lod cvs_branch.source_id = cvs_rev.id cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids for id in rev_1_1.branch_commit_ids: cvs_rev2 = self[id] cvs_rev2.prev_id = cvs_rev.id
def _adjust_branch_parents(self, cvs_branch): """Adjust the parent of CVS_BRANCH if possible and preferred. CVS_BRANCH is an instance of CVSBranch. This method must be called in leaf-to-trunk order.""" # The Symbol that cvs_branch would like to have as a parent: preferred_parent = Ctx()._symbol_db.get_symbol( cvs_branch.symbol.preferred_parent_id) if cvs_branch.source_lod == preferred_parent: # The preferred parent is already the parent. return # The CVSRevision that is its direct parent: source = self[cvs_branch.source_id] # This is always a CVSRevision because we haven't adjusted it yet: assert isinstance(source, CVSRevision) if isinstance(preferred_parent, Trunk): # It is not possible to graft *onto* Trunk: return # Try to find the preferred parent among the possible parents: for branch_id in source.branch_ids: possible_parent = self[branch_id] if possible_parent.symbol == preferred_parent: # We found it! break elif possible_parent.symbol == cvs_branch.symbol: # Only branches that precede the branch to be adjusted are # considered possible parents. Leave parentage unchanged: return else: # This point should never be reached. raise InternalError( 'Possible parent search did not terminate as expected') parent = possible_parent assert isinstance(parent, CVSBranch) logger.debug('Grafting %s from %s (on %s) onto %s' % ( cvs_branch, source, source.lod, parent,)) # Switch parent: source.branch_ids.remove(cvs_branch.id) parent.branch_ids.append(cvs_branch.id) cvs_branch.source_lod = parent.symbol cvs_branch.source_id = parent.id
def check_for_garbage(): # We've turned off the garbage collector because we shouldn't # need it (we don't create circular dependencies) and because it # is therefore a waste of time. So here we check for any # unreachable objects and generate a debug-level warning if any # occur: gc.set_debug(gc.DEBUG_SAVEALL) gc_count = gc.collect() if gc_count: if logger.is_on(logger.DEBUG): logger.debug( 'INTERNAL: %d unreachable object(s) were garbage collected:' % (gc_count, )) for g in gc.garbage: logger.debug(' %s' % (g, )) del gc.garbage[:]
def _generate_attic_cvs_files(self, cvs_directory, exclude_paths): """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY. Also yield CVS_DIRECTORY if any files are being retained in the Attic. Silently ignore subdirectories named '.svn' or 'CVS', but emit a warning if any other directories are found within the Attic directory.""" retained_attic_files = [] fnames = os.listdir(cvs_directory.rcs_path) fnames.sort() for fname in fnames: pathname = os.path.join(cvs_directory.rcs_path, fname) path_in_repository = path_join(cvs_directory.get_cvs_path(), fname) if path_in_repository in exclude_paths: logger.normal( "Excluding file from conversion: %s" % (path_in_repository,) ) elif os.path.isdir(pathname): if fname == '.svn' or fname == 'CVS': logger.debug( "Directory %s found within Attic; ignoring" % (pathname,) ) else: logger.warn( "Directory %s found within Attic; ignoring" % (pathname,) ) elif fname.endswith(',v'): cvs_file = self._get_attic_file(cvs_directory, fname) if cvs_file.parent_directory == cvs_directory: # This file will be retained in the Attic directory. retained_attic_files.append(cvs_file) else: # This is a normal Attic file, which is treated as if it # were located one directory up: yield cvs_file if retained_attic_files: # There was at least one file in the attic that will be retained # in the attic. First include the Attic directory itself in the # output, then the retained attic files: yield cvs_directory for cvs_file in retained_attic_files: yield cvs_file
def check_for_garbage(): # We've turned off the garbage collector because we shouldn't # need it (we don't create circular dependencies) and because it # is therefore a waste of time. So here we check for any # unreachable objects and generate a debug-level warning if any # occur: gc.set_debug(gc.DEBUG_SAVEALL) gc_count = gc.collect() if gc_count: if logger.is_on(logger.DEBUG): logger.debug( 'INTERNAL: %d unreachable object(s) were garbage collected:' % (gc_count,) ) for g in gc.garbage: logger.debug(' %s' % (g,)) del gc.garbage[:]
def _adjust_tag_parent(self, cvs_tag): """Adjust the parent of CVS_TAG if possible and preferred. CVS_TAG is an instance of CVSTag. This method must be called in leaf-to-trunk order.""" # The Symbol that cvs_tag would like to have as a parent: preferred_parent = Ctx()._symbol_db.get_symbol( cvs_tag.symbol.preferred_parent_id) if cvs_tag.source_lod == preferred_parent: # The preferred parent is already the parent. return # The CVSRevision that is its direct parent: source = self[cvs_tag.source_id] assert isinstance(source, CVSRevision) if isinstance(preferred_parent, Trunk): # It is not possible to graft *onto* Trunk: return # Try to find the preferred parent among the possible parents: for branch_id in source.branch_ids: if self[branch_id].symbol == preferred_parent: # We found it! break else: # The preferred parent is not a possible parent in this file. return parent = self[branch_id] assert isinstance(parent, CVSBranch) logger.debug('Grafting %s from %s (on %s) onto %s' % ( cvs_tag, source, source.lod, parent, )) # Switch parent: source.tag_ids.remove(cvs_tag.id) parent.tag_ids.append(cvs_tag.id) cvs_tag.source_lod = parent.symbol cvs_tag.source_id = parent.id
def get_command_output(command): """Run COMMAND and return its stdout. COMMAND is a list of strings. Run the command and return its stdout as a string. If the command exits with a nonzero return code or writes something to stderr, raise a CommandError.""" """A file-like object from which revision contents can be read.""" logger.debug('Running command %r' % (command, )) pipe = subprocess.Popen( command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) (stdout, stderr) = pipe.communicate() if pipe.returncode or stderr: raise CommandError(' '.join(command), pipe.returncode, stderr) return stdout
def check_for_garbage(self): """Check for any unreachable objects. Generate a DEBUG-level warning if any were found.""" try: gc.set_debug(gc.DEBUG_SAVEALL) gc_count = gc.collect() if gc_count: if logger.is_on(logger.DEBUG): logger.debug( 'INTERNAL: %d unreachable object(s) were garbage collected:' % (gc_count, )) for g in gc.garbage: logger.debug(' %s' % (g, )) del gc.garbage[:] except (AttributeError, NotImplementedError): # Other Python implementations implement garbage collection # differently, so if errors occur just ignore them. pass
def _adjust_tag_parent(self, cvs_tag): """Adjust the parent of CVS_TAG if possible and preferred. CVS_TAG is an instance of CVSTag. This method must be called in leaf-to-trunk order.""" # The Symbol that cvs_tag would like to have as a parent: preferred_parent = Ctx()._symbol_db.get_symbol( cvs_tag.symbol.preferred_parent_id) if cvs_tag.source_lod == preferred_parent: # The preferred parent is already the parent. return # The CVSRevision that is its direct parent: source = self[cvs_tag.source_id] assert isinstance(source, CVSRevision) if isinstance(preferred_parent, Trunk): # It is not possible to graft *onto* Trunk: return # Try to find the preferred parent among the possible parents: for branch_id in source.branch_ids: if self[branch_id].symbol == preferred_parent: # We found it! break else: # The preferred parent is not a possible parent in this file. return parent = self[branch_id] assert isinstance(parent, CVSBranch) logger.debug('Grafting %s from %s (on %s) onto %s' % ( cvs_tag, source, source.lod, parent,)) # Switch parent: source.tag_ids.remove(cvs_tag.id) parent.tag_ids.append(cvs_tag.id) cvs_tag.source_lod = parent.symbol cvs_tag.source_id = parent.id
def check_for_garbage(self): """Check for any unreachable objects. Generate a DEBUG-level warning if any were found.""" try: gc.set_debug(gc.DEBUG_SAVEALL) gc_count = gc.collect() if gc_count: if logger.is_on(logger.DEBUG): logger.debug( 'INTERNAL: %d unreachable object(s) were garbage collected:' % (gc_count,) ) for g in gc.garbage: logger.debug(' %s' % (g,)) del gc.garbage[:] except (AttributeError, NotImplementedError): # Other Python implementations implement garbage collection # differently, so if errors occur just ignore them. pass
def check_for_garbage(): # We've turned off the garbage collector because we shouldn't # need it (we don't create circular dependencies) and because it # is therefore a waste of time. So here we check for any # unreachable objects and generate a debug-level warning if any # occur: try: gc.set_debug(gc.DEBUG_SAVEALL) gc_count = gc.collect() if gc_count: if logger.is_on(logger.DEBUG): logger.debug( 'INTERNAL: %d unreachable object(s) were garbage collected:' % (gc_count,) ) for g in gc.garbage: logger.debug(' %s' % (g,)) del gc.garbage[:] except (AttributeError, NotImplementedError): # Other Python implementations implement garbage collection # differently, so if errors occur just ignore them. pass
def _process_tag_changeset(self, changeset, timestamp): """Process TagChangeset CHANGESET, producing a SVNTagCommit. Filter out CVSTagNoops. If no CVSTags are left, don't generate a SVNTagCommit.""" if Ctx().trunk_only: raise InternalError( 'TagChangeset encountered during a --trunk-only conversion') cvs_tag_ids = [ cvs_tag.id for cvs_tag in changeset.iter_cvs_items() if not isinstance(cvs_tag, CVSTagNoop) ] if cvs_tag_ids: yield SVNTagCommit( changeset.symbol, cvs_tag_ids, timestamp, self.revnum_generator.gen_id(), ) else: logger.debug( 'Omitting %r because it contains only CVSTagNoops' % (changeset,) )
def _process_tag_changeset(self, changeset, timestamp): """Process TagChangeset CHANGESET, producing a SVNTagCommit. Filter out CVSTagNoops. If no CVSTags are left, don't generate a SVNTagCommit.""" if Ctx().trunk_only: raise InternalError( 'TagChangeset encountered during a --trunk-only conversion') cvs_tag_ids = [ cvs_tag.id for cvs_tag in changeset.iter_cvs_items() if not isinstance(cvs_tag, CVSTagNoop) ] if cvs_tag_ids: yield SVNTagCommit( changeset.symbol, cvs_tag_ids, timestamp, self.revnum_generator.gen_id(), ) else: logger.debug('Omitting %r because it contains only CVSTagNoops' % (changeset, ))
def remove_initial_branch_deletes(self, metadata_db): """If the first revision on a branch is an unnecessary delete, remove it. If a file is added on a branch (whether or not it already existed on trunk), then new versions of CVS add a first branch revision in the 'dead' state (to indicate that the file did not exist on the branch when the branch was created) followed by the second branch revision, which is an add. When we encounter this situation, we sever the branch from trunk and delete the first branch revision.""" for lod_items in self.iter_lods(): if self._is_unneeded_initial_branch_delete(lod_items, metadata_db): cvs_revision = lod_items.cvs_revisions[0] logger.debug( 'Removing unnecessary initial branch delete %s' % (cvs_revision,) ) # Sever the branch from its source if necessary: self._sever_branch(lod_items) # Delete the first revision on the branch: self.root_ids.remove(cvs_revision.id) del self[cvs_revision.id] # If it had a successor, adjust its backreference and add it # to the root_ids: if cvs_revision.next_id is not None: cvs_rev_next = self[cvs_revision.next_id] cvs_rev_next.prev_id = None self.root_ids.add(cvs_rev_next.id) # Tagging a dead revision doesn't do anything, so remove any # tags that were set on it: for tag_id in cvs_revision.tag_ids: del self[tag_id]
def _add_pattern(self, pattern, props): propdict = {} if self.quoted_re.match(pattern): logger.warn( '%s: Quoting is not supported in auto-props; please verify rule\n' 'for %r. (Using pattern including quotation marks.)\n' % (warning_prefix, pattern,) ) for prop in props.split(';'): prop = prop.strip() m = self.property_unset_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, leaving %r unset.' % (pattern, name,) ) propdict[name] = None continue m = self.property_set_re.match(prop) if m: name = m.group('name') value = m.group('value') if self.quoted_re.match(value): logger.warn( '%s: Quoting is not supported in auto-props; please verify\n' 'rule %r for pattern %r. (Using value\n' 'including quotation marks.)\n' % (warning_prefix, prop, pattern,) ) logger.debug( 'auto-props: For %r, setting %r to %r.' % (pattern, name, value,) ) propdict[name] = value continue m = self.property_novalue_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, setting %r to the empty string' % (pattern, name,) ) propdict[name] = '' continue logger.warn( '%s: in auto-props line for %r, value %r cannot be parsed (ignored)' % (warning_prefix, pattern, prop,) ) self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
def generate_cvs_paths(self, cvs_directory): """Generate the CVSPaths under non-Attic directory CVS_DIRECTORY. Yield CVSDirectory and CVSFile instances as they are found. Process directories recursively, including Attic directories. Also look for conflicts between the filenames that will result from files, attic files, and subdirectories. Silently ignore subdirectories named '.svn', as these don't make much sense in a real conversion, but they are present in our test suite.""" yield cvs_directory # Map {cvs_file.basename : cvs_file.filename} for files directly # in cvs_directory: rcsfiles = {} attic_dir = None # Non-Attic subdirectories of cvs_directory (to be recursed into): dirs = [] fnames = os.listdir(cvs_directory.filename) fnames.sort() for fname in fnames: pathname = os.path.join(cvs_directory.filename, fname) if os.path.isdir(pathname): if fname == 'Attic': attic_dir = fname elif fname == '.svn': logger.debug("Directory %s ignored" % (pathname,)) else: dirs.append(fname) elif fname.endswith(',v'): cvs_file = self._get_cvs_file(cvs_directory, fname) rcsfiles[cvs_file.basename] = cvs_file.filename yield cvs_file else: # Silently ignore other files: pass # Map {cvs_file.basename : cvs_file.filename} for files in an # Attic directory within cvs_directory: attic_rcsfiles = {} if attic_dir is not None: attic_directory = CVSDirectory( self.file_key_generator.gen_id(), cvs_directory.project, cvs_directory, 'Attic', ) for cvs_path in self._generate_attic_cvs_files(attic_directory): if isinstance(cvs_path, CVSFile) \ and cvs_path.parent_directory == cvs_directory: attic_rcsfiles[cvs_path.basename] = cvs_path.filename yield cvs_path alldirs = dirs + [attic_dir] else: alldirs = dirs # Check for conflicts between directory names and the filenames # that will result from the rcs files (both in this directory and # in attic). (We recurse into the subdirectories nevertheless, to # try to detect more problems.) for fname in alldirs: for rcsfile_list in [rcsfiles, attic_rcsfiles]: if fname in rcsfile_list: self.error_handler( 'Directory name conflicts with filename. Please remove or ' 'rename one\n' 'of the following:\n' ' "%s"\n' ' "%s"' % ( os.path.join(cvs_directory.filename, fname), rcsfile_list[fname], ) ) # Now recurse into the other subdirectories: for fname in dirs: dirname = os.path.join(cvs_directory.filename, fname) # Verify that the directory name does not contain any illegal # characters: try: verify_svn_filename_legal(fname) except IllegalSVNPathError, e: raise FatalError( 'Directory %r would result in an illegal SVN path name: %s' % (dirname, e,) ) sub_directory = CVSDirectory( self.file_key_generator.gen_id(), cvs_directory.project, cvs_directory, fname, ) for cvs_path in self.generate_cvs_paths(sub_directory): yield cvs_path
def generate_cvs_paths(self, cvs_directory, exclude_paths): """Generate the CVSPaths under non-Attic directory CVS_DIRECTORY. Yield CVSDirectory and CVSFile instances as they are found. Process directories recursively, including Attic directories. Also look for conflicts between the filenames that will result from files, attic files, and subdirectories. Silently ignore subdirectories named 'CVS', as these are used by CVS to store metadata that are irrelevant to the conversion. Silently ignore subdirectories named '.svn', as these don't make much sense in a real conversion, but they are present in our test suite.""" yield cvs_directory # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files # directly in cvs_directory: rcsfiles = {} attic_dir = None # Non-Attic subdirectories of cvs_directory (to be recursed into): dirs = [] fnames = os.listdir(cvs_directory.rcs_path) fnames.sort() for fname in fnames: pathname = os.path.join(cvs_directory.rcs_path, fname) path_in_repository = path_join(cvs_directory.get_cvs_path(), fname) if path_in_repository in exclude_paths: logger.normal( "Excluding file from conversion: %s" % (path_in_repository,) ) pass elif os.path.isdir(pathname): if fname == 'Attic': attic_dir = fname elif fname == '.svn' or fname == 'CVS': logger.debug("Directory %s ignored" % (pathname,)) else: dirs.append(fname) elif fname.endswith(',v'): cvs_file = self._get_cvs_file(cvs_directory, fname) rcsfiles[cvs_file.rcs_basename] = cvs_file.rcs_path yield cvs_file else: # Silently ignore other files: pass # Map {cvs_file.rcs_basename : cvs_file.rcs_path} for files in an # Attic directory within cvs_directory: attic_rcsfiles = {} if attic_dir is not None: attic_directory = CVSDirectory( self.file_key_generator.gen_id(), cvs_directory.project, cvs_directory, 'Attic', ) for cvs_path in self._generate_attic_cvs_files(attic_directory, exclude_paths): if isinstance(cvs_path, CVSFile) \ and cvs_path.parent_directory == cvs_directory: attic_rcsfiles[cvs_path.rcs_basename] = cvs_path.rcs_path yield cvs_path alldirs = dirs + [attic_dir] else: alldirs = dirs # Check for conflicts between directory names and the filenames # that will result from the rcs files (both in this directory and # in attic). (We recurse into the subdirectories nevertheless, to # try to detect more problems.) for fname in alldirs: for rcsfile_list in [rcsfiles, attic_rcsfiles]: if fname in rcsfile_list: self.error_handler( 'Directory name conflicts with filename. Please remove or ' 'rename one\n' 'of the following:\n' ' "%s"\n' ' "%s"' % ( os.path.join(cvs_directory.rcs_path, fname), rcsfile_list[fname], ) ) # Now recurse into the other subdirectories: for fname in dirs: dirname = os.path.join(cvs_directory.rcs_path, fname) # Verify that the directory name does not contain any illegal # characters: try: Ctx().output_option.verify_filename_legal(fname) except IllegalSVNPathError, e: raise FatalError( 'Directory %r would result in an illegal SVN path name: %s' % (dirname, e,) ) sub_directory = CVSDirectory( self.file_key_generator.gen_id(), cvs_directory.project, cvs_directory, fname, ) for cvs_path in self.generate_cvs_paths(sub_directory, exclude_paths): yield cvs_path
def _add_pattern(self, pattern, props): propdict = {} if self.quoted_re.match(pattern): logger.warn( '%s: Quoting is not supported in auto-props; please verify rule\n' 'for %r. (Using pattern including quotation marks.)\n' % ( warning_prefix, pattern, )) for prop in props.split(';'): prop = prop.strip() m = self.property_unset_re.match(prop) if m: name = m.group('name') logger.debug('auto-props: For %r, leaving %r unset.' % ( pattern, name, )) propdict[name] = None continue m = self.property_set_re.match(prop) if m: name = m.group('name') value = m.group('value') if self.quoted_re.match(value): logger.warn( '%s: Quoting is not supported in auto-props; please verify\n' 'rule %r for pattern %r. (Using value\n' 'including quotation marks.)\n' % ( warning_prefix, prop, pattern, )) logger.debug('auto-props: For %r, setting %r to %r.' % ( pattern, name, value, )) propdict[name] = value continue m = self.property_novalue_re.match(prop) if m: name = m.group('name') logger.debug( 'auto-props: For %r, setting %r to the empty string' % ( pattern, name, )) propdict[name] = '' continue logger.warn( '%s: in auto-props line for %r, value %r cannot be parsed (ignored)' % ( warning_prefix, pattern, prop, )) self.patterns.append( self.Pattern(self.transform_case(pattern), propdict))