def _process_branch_changeset(self, changeset, timestamp): """Process BranchChangeset CHANGESET, producing a SVNBranchCommit. Filter out CVSBranchNoops. If no CVSBranches are left, don't generate a SVNBranchCommit.""" if Ctx().trunk_only: raise InternalError( 'BranchChangeset encountered during a --trunk-only conversion') cvs_branches = [ cvs_branch for cvs_branch in changeset.iter_cvs_items() if not isinstance(cvs_branch, CVSBranchNoop) ] if cvs_branches: svn_commit = SVNBranchCommit( changeset.symbol, [cvs_branch.id for cvs_branch in cvs_branches], timestamp, self.revnum_generator.gen_id(), ) yield svn_commit for cvs_branch in cvs_branches: Ctx()._symbolings_logger.log_branch_revision( cvs_branch, svn_commit.revnum) else: logger.debug( 'Omitting %r because it contains only CVSBranchNoops' % (changeset, ))
def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, )
def process_output_options(self): """Process options related to fastimport output.""" ctx = Ctx() options = self.options if options.use_rcs: revision_reader = RCSRevisionReader( co_executable=options.co_executable ) else: # --use-cvs is the default: revision_reader = CVSRevisionReader( cvs_executable=options.cvs_executable ) if not ctx.dry_run and not options.dumpfile: raise FatalError("must pass '--dry-run' or '--dumpfile' option.") # See cvs2bzr-example.options for explanations of these ctx.revision_collector = NullRevisionCollector() ctx.revision_reader = None if ctx.dry_run: ctx.output_option = NullOutputOption() else: ctx.output_option = BzrOutputOption( options.dumpfile, GitRevisionInlineWriter(revision_reader), # Optional map from CVS author names to bzr author names: author_transforms={}, # FIXME )
def main(): try: opts, args = getopt.getopt(sys.argv[1:], "RNr:mlfcCiIp:") except getopt.GetoptError: usage() if len(args) > 1 or len(opts) != 1: usage() if len(args) == 1: Ctx().tmpdir = args[0] for o, a in opts: if o == "-R": show_int2str_db(config.SVN_MIRROR_REVISIONS_TABLE) elif o == "-N": show_str2marshal_db(config.SVN_MIRROR_NODES_STORE, config.SVN_MIRROR_NODES_INDEX_TABLE) elif o == "-r": try: revnum = int(a) except ValueError: sys.stderr.write( 'Option -r requires a valid revision number\n') sys.exit(1) db = anydbm.open(config.SVN_MIRROR_REVISIONS_TABLE, 'r') key = db[str(revnum)] db.close() db = anydbm.open(config.SVN_MIRROR_NODES_STORE, 'r') print_node_tree(db, key, "Revision %d" % revnum) elif o == "-m": show_str2marshal_db(config.METADATA_DB) elif o == "-f": prime_ctx() cvs_files = list(Ctx()._cvs_path_db.itervalues()) cvs_files.sort() for cvs_file in cvs_files: print '%6x: %s' % ( cvs_file.id, cvs_file, ) elif o == "-c": prime_ctx() show_str2ppickle_db(config.SVN_COMMITS_INDEX_TABLE, config.SVN_COMMITS_STORE) elif o == "-C": show_str2marshal_db(config.CVS_REVS_TO_SVN_REVNUMS) elif o == "-i": prime_ctx() show_cvsitemstore() elif o == "-I": prime_ctx() show_filtered_cvs_item_store() elif o == "-p": obj = pickle.load(open(a)) print repr(obj) print obj else: usage() sys.exit(2)
def process_extraction_options(self): """Process options related to extracting data from the CVS repository.""" ctx = Ctx() options = self.options not_both(options.use_rcs, '--use-rcs', options.use_cvs, '--use-cvs') not_both(options.use_rcs, '--use-rcs', options.use_internal_co, '--use-internal-co') not_both(options.use_cvs, '--use-cvs', options.use_internal_co, '--use-internal-co') if options.use_rcs: ctx.revision_recorder = NullRevisionRecorder() ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = RCSRevisionReader(options.co_executable) elif options.use_cvs: ctx.revision_recorder = NullRevisionRecorder() ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = CVSRevisionReader(options.cvs_executable) else: # --use-internal-co is the default: ctx.revision_recorder = InternalRevisionRecorder(compress=True) ctx.revision_excluder = InternalRevisionExcluder() ctx.revision_reader = InternalRevisionReader(compress=True)
def get_log_msg(self): """Return a manufactured log message for this commit.""" return Ctx().text_wrapper.fill( Ctx().symbol_commit_message % { 'symbol_type': self._get_symbol_type(), 'symbol_name': self.symbol.name, })
def __setstate__(self, data): (self.id, cvs_file_id, self.timestamp, self.metadata_id, self.prev_id, self.next_id, self.rev, self.deltatext_exists, lod_id, self.first_on_branch_id, self.ntdbr, self.ntdbr_prev_id, self.ntdbr_next_id, self.tag_ids, self.branch_ids, self.branch_commit_ids, self.opened_symbols, self.closed_symbols, self.revision_recorder_token) = data self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) self.lod = Ctx()._symbol_db.get_symbol(lod_id)
def check(self): if Ctx().cross_project_commits: raise FatalError( 'Git output is not supported with cross-project commits') if Ctx().cross_branch_commits: raise FatalError( 'Git output is not supported with cross-branch commits') if Ctx().username is None: raise FatalError('Git output requires a default commit username')
def process_output_options(self): """Process options related to fastimport output.""" ctx = Ctx() ctx.output_option = GitOutputOption( self.options.dumpfile, GitRevisionMarkWriter(), # Optional map from CVS author names to git author names: author_transforms={}, # FIXME )
def __init__(self, *args, **kwargs): # Override some default values ctx = Ctx() ctx.symbol_commit_message = ("artificial changeset to create " "%(symbol_type)s '%(symbol_name)s'") ctx.post_commit_message = ( "artificial changeset: compensate for changes in %(revnum)s " "(on non-trunk default branch in CVS)") DVCSRunOptions.__init__(self, *args, **kwargs)
def main(progname, run_options, pass_manager): # Disable garbage collection, as we do not not create any circular # data structures. To verify this assumption, the function # check_for_garbage() in pass_manager.py is run after every pass. # It verifies that no unreachable objects have been created (or # reports any that were found): try: gc.disable() except (AttributeError, NotImplementedError): # Other Python implementations implement garbage collection # differently, so if an error occurs just ignore it. pass # Convenience var, so we don't have to keep instantiating this Borg. ctx = Ctx() # Make sure the tmp directory exists. Note that we don't check if # it's empty -- we want to be able to use, for example, "." to hold # tempfiles. if ctx.tmpdir is None: ctx.tmpdir = tempfile.mkdtemp(prefix=('%s-' % (progname,))) erase_tmpdir = True logger.quiet( 'Writing temporary files to %r\n' 'Be sure to use --tmpdir=%r if you need to resume this conversion.' % (ctx.tmpdir, ctx.tmpdir,), ) elif not os.path.exists(ctx.tmpdir): os.mkdir(ctx.tmpdir) erase_tmpdir = True elif not os.path.isdir(ctx.tmpdir): raise FatalError( "cvs2svn tried to use '%s' for temporary files, but that path\n" " exists and is not a directory. Please make it be a directory,\n" " or specify some other directory for temporary files." % (ctx.tmpdir,)) else: erase_tmpdir = False # But do lock the tmpdir, to avoid process clash. try: os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock')) except OSError, e: if e.errno == errno.EACCES: raise FatalError("Permission denied:" + " No write access to directory '%s'." % ctx.tmpdir) if e.errno == errno.EEXIST: raise FatalError( "cvs2svn is using directory '%s' for temporary files, but\n" " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n" " cvs2svn process is currently using '%s' as its temporary\n" " workspace. If you are certain that is not the case,\n" " then remove the '%s/cvs2svn.lock' subdirectory." % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,)) raise
def __init__(self, *args, **kwargs): # Override some default values ctx = Ctx() ctx.symbol_commit_message = ( "artificial changeset to create " "%(symbol_type)s '%(symbol_name)s'") ctx.post_commit_message = ( "artificial changeset: compensate for changes in %(revnum)s " "(on non-trunk default branch in CVS)") DVCSRunOptions.__init__(self, *args, **kwargs)
def __setstate__(self, state): ( cvs_file_id, trunk_id, cvs_items, ) = state CVSFileItems.__init__( self, Ctx()._cvs_file_db.get_file(cvs_file_id), Ctx()._symbol_db.get_symbol(trunk_id), cvs_items, )
def __setstate__(self, data): ( self.id, cvs_file_id, symbol_id, source_lod_id, self.source_id, self.revision_recorder_token, ) = data self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id) self.symbol = Ctx()._symbol_db.get_symbol(symbol_id) self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
def process_output_options(self): """Process options related to fastimport output.""" ctx = Ctx() if ctx.dry_run: ctx.output_option = NullOutputOption() else: ctx.output_option = GitOutputOption( GitRevisionMarkWriter(), dump_filename=self.options.dumpfile, # Optional map from CVS author names to git author names: author_transforms={}, # FIXME )
def main(progname, run_options, pass_manager): # Convenience var, so we don't have to keep instantiating this Borg. ctx = Ctx() # Make sure the tmp directory exists. Note that we don't check if # it's empty -- we want to be able to use, for example, "." to hold # tempfiles. if ctx.tmpdir is None: ctx.tmpdir = tempfile.mkdtemp(prefix=('%s-' % (progname, ))) erase_tmpdir = True logger.quiet( 'Writing temporary files to %r\n' 'Be sure to use --tmpdir=%r if you need to resume this conversion.' % ( ctx.tmpdir, ctx.tmpdir, ), ) elif not os.path.exists(ctx.tmpdir): os.mkdir(ctx.tmpdir) erase_tmpdir = True elif not os.path.isdir(ctx.tmpdir): raise FatalError( "cvs2svn tried to use '%s' for temporary files, but that path\n" " exists and is not a directory. Please make it be a directory,\n" " or specify some other directory for temporary files." % (ctx.tmpdir, )) else: erase_tmpdir = False # But do lock the tmpdir, to avoid process clash. try: os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock')) except OSError, e: if e.errno == errno.EACCES: raise FatalError("Permission denied:" + " No write access to directory '%s'." % ctx.tmpdir) if e.errno == errno.EEXIST: raise FatalError( "cvs2svn is using directory '%s' for temporary files, but\n" " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n" " cvs2svn process is currently using '%s' as its temporary\n" " workspace. If you are certain that is not the case,\n" " then remove the '%s/cvs2svn.lock' subdirectory." % ( ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, )) raise
def _register_empty_subdirectories(self): """Set the CVSDirectory.empty_subdirectory_id members.""" directories = set(path for path in Ctx()._cvs_path_db.itervalues() if isinstance(path, CVSDirectory)) for path in Ctx()._cvs_path_db.itervalues(): if isinstance(path, CVSFile): directory = path.parent_directory while directory is not None and directory in directories: directories.remove(directory) directory = directory.parent_directory for directory in directories: if directory.parent_directory is not None: directory.parent_directory.empty_subdirectory_ids.append( directory.id)
def process_project(self, project): Ctx()._projects[project.id] = project root_cvs_directory = CVSDirectory( self.file_key_generator.gen_id(), project, None, '' ) project.root_cvs_directory_id = root_cvs_directory.id pdc = _ProjectDataCollector(self, project) found_rcs_file = False for cvs_file in self._generate_cvs_files(root_cvs_directory): pdc.process_file(cvs_file) found_rcs_file = True if not found_rcs_file: self.record_fatal_error( 'No RCS files found under %r!\n' 'Are you absolutely certain you are pointing cvs2svn\n' 'at a CVS repository?\n' % (project.project_cvs_repos_path,) ) pdc.summarize_symbol_transforms() self.num_files += pdc.num_files Log().verbose('Processed', self.num_files, 'files')
def __setstate__(self, state): ( self.id, project_id, self.base_path, ) = state self.project = Ctx()._projects[project_id]
def __setstate__(self, state): (cvs_file_id, cvs_items, original_ids,) = state cvs_file = Ctx()._cvs_path_db.get_path(cvs_file_id) CVSFileItems.__init__( self, cvs_file, cvs_file.project.get_trunk(), cvs_items, original_ids=original_ids, )
def process_common_options(self): """Process the options that are compatible with --options.""" # Adjust level of verbosity: for (opt, value) in self.get_options('--verbose', '-v'): Log().increase_verbosity() for (opt, value) in self.get_options('--quiet', '-q'): Log().decrease_verbosity() for (opt, value) in self.get_options('--pass', '--passes', '-p'): if value.find(':') >= 0: start_pass, end_pass = value.split(':') self.start_pass = self.pass_manager.get_pass_number( start_pass, 1) self.end_pass = self.pass_manager.get_pass_number( end_pass, self.pass_manager.num_passes) else: self.end_pass = \ self.start_pass = \ self.pass_manager.get_pass_number(value) if self.get_options('--dry-run'): Ctx().dry_run = True if self.get_options('--profile'): self.profiling = True
def process_property_setter_options(self): super(DVCSRunOptions, self).process_property_setter_options() # Property setters for internal use: Ctx().file_property_setters.append( KeywordHandlingPropertySetter('collapsed') )
def get_content(self, cvs_rev): # Is EOL fixing requested? eol_fix = cvs_rev.get_property('_eol_fix') or None # How do we want keywords to be handled? keyword_handling = cvs_rev.get_property('_keyword_handling') or None try: (k_option, explicit_keyword_handling) = self._text_options[bool(eol_fix), keyword_handling] except KeyError: raise FatalError( 'Undefined _keyword_handling property (%r) for %s' % ( keyword_handling, cvs_rev, )) data = get_command_output(self.get_pipe_command(cvs_rev, k_option)) if Ctx().decode_apple_single: # Insert a filter to decode any files that are in AppleSingle # format: data = get_maybe_apple_single(data) if explicit_keyword_handling == 'expanded': data = expand_keywords(data, cvs_rev) elif explicit_keyword_handling == 'collapsed': data = collapse_keywords(data) if eol_fix: data = canonicalize_eol(data, eol_fix) return data
def process_extraction_options(self): """Process options related to extracting data from the CVS repository.""" ctx = Ctx() options = self.options not_both(options.use_rcs, '--use-rcs', options.use_cvs, '--use-cvs')
def __setstate__(self, state): ( self.id, project_id, self.parent_directory, self.basename, self.ordinal, ) = state self.project = Ctx()._projects[project_id]
def register_artifacts(self, which_pass): GitRevisionWriter.register_artifacts(self, which_pass) if Ctx().revision_collector.blob_filename is None: artifact_manager.register_temp_file_needed( config.GIT_BLOB_DATAFILE, which_pass, )
class _NodeSerializer(MarshalSerializer): def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db def _dump(self, node): return [ (cvs_path.id, value) for (cvs_path, value) in node.iteritems() ] def dumpf(self, f, node): MarshalSerializer.dumpf(self, f, self._dump(node)) def dumps(self, node): return MarshalSerializer.dumps(self, self._dump(node)) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def loadf(self, f): return self._load(MarshalSerializer.loadf(self, f)) def loads(self, s): return self._load(MarshalSerializer.loads(self, s))
def _get_cvs_file( self, parent_directory, basename, file_in_attic=False, leave_in_attic=False, ): """Return a CVSFile describing the file with name BASENAME. PARENT_DIRECTORY is the CVSDirectory instance describing the directory that physically holds this file in the filesystem. BASENAME must be the base name of a *,v file within PARENT_DIRECTORY. FILE_IN_ATTIC is a boolean telling whether the specified file is in an Attic subdirectory. If FILE_IN_ATTIC is True, then: - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in the filename. - Otherwise, raise FileInAndOutOfAtticException if a file with the same filename appears outside of Attic. The CVSFile is assigned a new unique id. All of the CVSFile information is filled in except mode (which can only be determined by parsing the file). Raise FatalError if the resulting filename would not be legal in SVN.""" filename = os.path.join(parent_directory.rcs_path, basename) try: Ctx().output_option.verify_filename_legal(basename[:-2]) except IllegalSVNPathError, e: raise FatalError( 'File %r would result in an illegal SVN filename: %s' % (filename, e,) )
def process_property_setter_options(self): """Process the options that set SVN properties.""" ctx = Ctx() options = self.options for value in options.auto_props_files: ctx.svn_property_setters.append( AutoPropsPropertySetter(value, options.auto_props_ignore_case) ) for value in options.mime_types_files: ctx.svn_property_setters.append(MimeMapper(value)) ctx.svn_property_setters.append(CVSBinaryFileEOLStyleSetter()) ctx.svn_property_setters.append(CVSBinaryFileDefaultMimeTypeSetter()) if options.eol_from_mime_type: ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter()) ctx.svn_property_setters.append( DefaultEOLStyleSetter(options.default_eol) ) ctx.svn_property_setters.append(SVNBinaryFileKeywordsPropertySetter()) if not options.keywords_off: ctx.svn_property_setters.append( KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE)) ctx.svn_property_setters.append(ExecutablePropertySetter())
def delete_path(self, lod, cvs_path): dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path())) if basename == '.cvsignore': # When a .cvsignore file is deleted, the directory's svn:ignore # property needs to be deleted. ignore_contents = 'PROPS-END\n' ignore_len = len(ignore_contents) # write headers, then props self._dumpfile.write( 'Node-path: %s\n' 'Node-kind: dir\n' 'Node-action: change\n' 'Prop-content-length: %d\n' 'Content-length: %d\n' '\n' '%s' % (self._utf8_path(dir_path), ignore_len, ignore_len, ignore_contents) ) if not Ctx().keep_cvsignore: return self._dumpfile.write( 'Node-path: %s\n' 'Node-action: delete\n' '\n' % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),) )
def get_root_cvs_directory(self): """Return the root CVSDirectory instance for this project. This method can only be called after self.root_cvs_directory_id has been initialized in CollectRevsPass.""" return Ctx()._cvs_path_db.get_path(self.root_cvs_directory_id)
def get_trunk(self): """Return the Trunk instance for this project. This method can only be called after self.trunk_id has been initialized in CollectRevsPass.""" return Ctx()._symbol_db.get_symbol(self.trunk_id)
def _get_attic_file(self, parent_directory, basename): """Return a CVSFile object for the Attic file at BASENAME. PARENT_DIRECTORY is the CVSDirectory that physically contains the file on the filesystem (i.e., the Attic directory). It is not necessarily the parent_directory of the CVSFile that will be returned. Return CVSFile, whose parent directory is usually PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY iff CVSFile will remain in the Attic directory.""" try: return self._get_cvs_file( parent_directory, basename, file_in_attic=True, ) except FileInAndOutOfAtticException, e: if Ctx().retain_conflicting_attic_files: logger.warn( "%s: %s;\n" " storing the latter into 'Attic' subdirectory.\n" % (warning_prefix, e) ) else: self.error_handler(str(e)) # Either way, return a CVSFile object so that the rest of the # file processing can proceed: return self._get_cvs_file( parent_directory, basename, file_in_attic=True, leave_in_attic=True, )
def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum): if isinstance(cvs_path, CVSFile): node_kind = 'file' if cvs_path.rcs_basename == '.cvsignore': # FIXME: Here we have to adjust the containing directory's # svn:ignore property to reflect the addition of the # .cvsignore file to the LOD! This is awkward because we # don't have the contents of the .cvsignore file available. if not Ctx().keep_cvsignore: return elif isinstance(cvs_path, CVSDirectory): node_kind = 'dir' else: raise InternalError() self._dumpfile.write( 'Node-path: %s\n' 'Node-kind: %s\n' 'Node-action: add\n' 'Node-copyfrom-rev: %d\n' 'Node-copyfrom-path: %s\n' '\n' % ( self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)), node_kind, src_revnum, self._utf8_path(src_lod.get_path(cvs_path.cvs_path)) ) )
def __init__(self, target): self.loader_pipe = subprocess.Popen( [Ctx().svnadmin_executable, 'load', '-q', target], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) self.loader_pipe.stdout.close()
def __setstate__(self, state): ( self.id, project_id, self.parent_directory, self.rcs_basename, self.ordinal, ) = state self.project = Ctx()._projects[project_id] self.rcs_path = os.path.normpath(self._calculate_rcs_path())
def process_output_options(self): """Process the options related to SVN output.""" ctx = Ctx() options = self.options if options.dump_only and not options.dumpfile: raise FatalError("'--dump-only' requires '--dumpfile' to be specified.") if not options.svnrepos and not options.dumpfile and not ctx.dry_run: raise FatalError("must pass one of '-s' or '--dumpfile'.") not_both(options.svnrepos, '-s', options.dumpfile, '--dumpfile') not_both(options.dumpfile, '--dumpfile', options.existing_svnrepos, '--existing-svnrepos') not_both(options.bdb_txn_nosync, '--bdb-txn-nosync', options.existing_svnrepos, '--existing-svnrepos') not_both(options.dumpfile, '--dumpfile', options.bdb_txn_nosync, '--bdb-txn-nosync') not_both(options.fs_type, '--fs-type', options.existing_svnrepos, '--existing-svnrepos') if ( options.fs_type and options.fs_type != 'bdb' and options.bdb_txn_nosync ): raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s." % options.fs_type) if options.svnrepos: if options.existing_svnrepos: ctx.output_option = ExistingRepositoryOutputOption(options.svnrepos) else: ctx.output_option = NewRepositoryOutputOption( options.svnrepos, fs_type=options.fs_type, bdb_txn_nosync=options.bdb_txn_nosync, create_options=options.create_options) else: ctx.output_option = DumpfileOutputOption(options.dumpfile)
def main(progname, run_options, pass_manager): # Convenience var, so we don't have to keep instantiating this Borg. ctx = Ctx() # Make sure the tmp directory exists. Note that we don't check if # it's empty -- we want to be able to use, for example, "." to hold # tempfiles. if ctx.tmpdir is None: ctx.tmpdir = tempfile.mkdtemp(prefix=('%s-' % (progname,))) erase_tmpdir = True logger.quiet( 'Writing temporary files to %r\n' 'Be sure to use --tmpdir=%r if you need to resume this conversion.' % (ctx.tmpdir, ctx.tmpdir,), ) elif not os.path.exists(ctx.tmpdir): os.mkdir(ctx.tmpdir) erase_tmpdir = True elif not os.path.isdir(ctx.tmpdir): raise FatalError( "cvs2svn tried to use '%s' for temporary files, but that path\n" " exists and is not a directory. Please make it be a directory,\n" " or specify some other directory for temporary files." % (ctx.tmpdir,)) else: erase_tmpdir = False # But do lock the tmpdir, to avoid process clash. try: os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock')) except OSError, e: if e.errno == errno.EACCES: raise FatalError("Permission denied:" + " No write access to directory '%s'." % ctx.tmpdir) if e.errno == errno.EEXIST: raise FatalError( "cvs2svn is using directory '%s' for temporary files, but\n" " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n" " cvs2svn process is currently using '%s' as its temporary\n" " workspace. If you are certain that is not the case,\n" " then remove the '%s/cvs2svn.lock' subdirectory." % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,)) raise
def process_extraction_options(self): """Process options related to extracting data from the CVS repository.""" ctx = Ctx() options = self.options not_both(options.use_rcs, '--use-rcs', options.use_cvs, '--use-cvs') not_both(options.use_external_blob_generator, '--use-external-blob-generator', options.use_cvs, '--use-cvs') not_both(options.use_external_blob_generator, '--use-external-blob-generator', options.use_rcs, '--use-rcs') # cvs2git never needs a revision reader: ctx.revision_reader = None if ctx.dry_run: ctx.revision_collector = NullRevisionCollector() return if not (options.blobfile and options.dumpfile): raise FatalError("must pass '--blobfile' and '--dumpfile' options.") if options.use_external_blob_generator: ctx.revision_collector = ExternalBlobGenerator(options.blobfile) else: if options.use_rcs: revision_reader = RCSRevisionReader( co_executable=options.co_executable ) else: # --use-cvs is the default: revision_reader = CVSRevisionReader( cvs_executable=options.cvs_executable ) ctx.revision_collector = GitRevisionCollector( options.blobfile, revision_reader, )
def process_io_options(self): """Process input/output options. Process options related to extracting data from the CVS repository and writing to a Bazaar-friendly fast-import file.""" ctx = Ctx() options = self.options not_both(options.use_rcs, '--use-rcs', options.use_cvs, '--use-cvs') if options.use_rcs: revision_reader = RCSRevisionReader( co_executable=options.co_executable ) else: # --use-cvs is the default: revision_reader = CVSRevisionReader( cvs_executable=options.cvs_executable ) if not ctx.dry_run and not options.dumpfile: raise FatalError("must pass '--dry-run' or '--dumpfile' option.") ctx.revision_recorder = NullRevisionRecorder() ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = None ctx.output_option = GitOutputOption( options.dumpfile, GitRevisionInlineWriter(revision_reader), max_merges=None, # Optional map from CVS author names to bzr author names: author_transforms={}, # FIXME )
def process_all_extraction_options(self): ctx = Ctx() options = self.options not_both(options.use_rcs, "--use-rcs", options.use_cvs, "--use-cvs") not_both(options.use_rcs, "--use-rcs", options.use_internal_co, "--use-internal-co") not_both(options.use_cvs, "--use-cvs", options.use_internal_co, "--use-internal-co") if options.use_rcs: ctx.revision_collector = NullRevisionCollector() ctx.revision_reader = RCSRevisionReader(options.co_executable) elif options.use_cvs: ctx.revision_collector = NullRevisionCollector() ctx.revision_reader = CVSRevisionReader(options.cvs_executable) else: # --use-internal-co is the default: ctx.revision_collector = InternalRevisionCollector(compress=True) ctx.revision_reader = InternalRevisionReader(compress=True)
def process_io_options(self): """Process input/output options. Process options related to extracting data from the CVS repository and writing to 'git fast-import'-formatted files.""" ctx = Ctx() options = self.options not_both(options.use_rcs, '--use-rcs', options.use_cvs, '--use-cvs') if options.use_rcs: revision_reader = RCSRevisionReader( co_executable=options.co_executable ) else: # --use-cvs is the default: revision_reader = CVSRevisionReader( cvs_executable=options.cvs_executable ) if ctx.dry_run: ctx.revision_recorder = NullRevisionRecorder() else: if not (options.blobfile and options.dumpfile): raise FatalError("must pass '--blobfile' and '--dumpfile' options.") ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter( revision_reader, GitRevisionRecorder(options.blobfile), ) ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = None ctx.output_option = GitOutputOption( options.dumpfile, GitRevisionMarkWriter(), max_merges=None, # Optional map from CVS author names to git author names: author_transforms={}, # FIXME )
class _NodeDatabase(object): """A database storing all of the directory nodes. The nodes are written in groups every time write_new_nodes() is called. To the database is written a dictionary {node_id : [(cvs_path.id, node_id),...]}, where the keys are the node_ids of the new nodes. When a node is read, its whole group is read and cached under the assumption that the other nodes in the group are likely to be needed soon. The cache is retained across revisions and cleared when _cache_max_size is exceeded. The dictionaries for nodes that have been read from the database during the current revision are cached by node_id in the _cache member variable. The corresponding dictionaries are *not* copied when read. To avoid cross-talk between distinct MirrorDirectory instances that have the same node_id, users of these dictionaries have to copy them before modification.""" # How many entries should be allowed in the cache for each # CVSDirectory in the repository. (This number is very roughly the # number of complete lines of development that can be stored in the # cache at one time.) CACHE_SIZE_MULTIPLIER = 5 # But the cache will never be limited to less than this number: MIN_CACHE_LIMIT = 5000 def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, ) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def _dump(self, node): return [ (cvs_path.id, value) for (cvs_path, value) in node.iteritems() ] def _determine_index(self, id): """Return the index of the record holding the node with ID.""" return bisect.bisect_left(self._max_node_ids, id) def __getitem__(self, id): try: items = self._cache[id] except KeyError: index = self._determine_index(id) for (node_id, items) in self.db[index].items(): self._cache[node_id] = self._load(items) items = self._cache[id] return items def write_new_nodes(self, nodes): """Write NODES to the database. NODES is an iterable of writable CurrentMirrorDirectory instances.""" if len(self._cache) > self._cache_max_size: # The size of the cache has exceeded the threshold. Discard the # old cache values (but still store the new nodes into the # cache): Log().debug('Clearing node cache') self._cache.clear() data = {} max_node_id = 0 for node in nodes: max_node_id = max(max_node_id, node.id) data[node.id] = self._dump(node._entries) self._cache[node.id] = node._entries self.db[len(self._max_node_ids)] = data if max_node_id == 0: # Rewrite last value: self._max_node_ids.append(self._max_node_ids[-1]) else: self._max_node_ids.append(max_node_id) def close(self): self._cache.clear() self.db.close() self.db = None
def process_remaining_options(self): """Process the options that are not compatible with --options.""" # Convenience var, so we don't have to keep instantiating this Borg. ctx = Ctx() target = None existing_svnrepos = False fs_type = None bdb_txn_nosync = False dump_only = False dumpfile = None use_rcs = False use_cvs = False use_internal_co = False symbol_strategy_default = 'heuristic' mime_types_file = None auto_props_file = None auto_props_ignore_case = True eol_from_mime_type = False default_eol = None keywords_off = False co_executable = config.CO_EXECUTABLE cvs_executable = config.CVS_EXECUTABLE trunk_base = config.DEFAULT_TRUNK_BASE branches_base = config.DEFAULT_BRANCHES_BASE tags_base = config.DEFAULT_TAGS_BASE encodings = ['ascii'] fallback_encoding = None force_branch = False force_tag = False symbol_transforms = [] symbol_strategy_rules = [] for opt, value in self.opts: if opt in ['-s', '--svnrepos']: target = value elif opt == '--existing-svnrepos': existing_svnrepos = True elif opt == '--dumpfile': dumpfile = value elif opt == '--use-rcs': use_rcs = True elif opt == '--use-cvs': use_cvs = True elif opt == '--use-internal-co': use_internal_co = True elif opt == '--trunk-only': ctx.trunk_only = True elif opt == '--trunk': trunk_base = value elif opt == '--branches': branches_base = value elif opt == '--tags': tags_base = value elif opt == '--no-prune': ctx.prune = False elif opt == '--encoding': encodings.insert(-1, value) elif opt == '--fallback-encoding': fallback_encoding = value elif opt == '--symbol-hints': symbol_strategy_rules.append(SymbolHintsFileRule(value)) elif opt == '--force-branch': symbol_strategy_rules.append(ForceBranchRegexpStrategyRule(value)) force_branch = True elif opt == '--force-tag': symbol_strategy_rules.append(ForceTagRegexpStrategyRule(value)) force_tag = True elif opt == '--exclude': symbol_strategy_rules.append(ExcludeRegexpStrategyRule(value)) elif opt == '--symbol-default': if value not in ['branch', 'tag', 'heuristic', 'strict']: raise FatalError( '%r is not a valid option for --symbol_default.' % (value,)) symbol_strategy_default = value elif opt == '--no-cross-branch-commits': ctx.cross_branch_commits = False elif opt == '--retain-conflicting-attic-files': ctx.retain_conflicting_attic_files = True elif opt == '--symbol-transform': [pattern, replacement] = value.split(":") try: symbol_transforms.append( RegexpSymbolTransform(pattern, replacement)) except re.error: raise FatalError("'%s' is not a valid regexp." % (pattern,)) elif opt == '--username': ctx.username = value elif opt == '--fs-type': fs_type = value elif opt == '--bdb-txn-nosync': bdb_txn_nosync = True elif opt == '--cvs-revnums': ctx.svn_property_setters.append(CVSRevisionNumberSetter()) elif opt == '--mime-types': mime_types_file = value elif opt == '--auto-props': auto_props_file = value elif opt == '--auto-props-ignore-case': # "ignore case" is now the default, so this option doesn't # affect anything. auto_props_ignore_case = True elif opt == '--eol-from-mime-type': eol_from_mime_type = True elif opt == '--default-eol': try: # Check that value is valid, and translate it to the proper case default_eol = { 'binary' : None, 'native' : 'native', 'crlf' : 'CRLF', 'lf' : 'LF', 'cr' : 'CR', }[value.lower()] except KeyError: raise FatalError( 'Illegal value specified for --default-eol: %s' % (value,) ) elif opt == '--no-default-eol': # For backwards compatibility: default_eol = None elif opt == '--keywords-off': keywords_off = True elif opt == '--tmpdir': ctx.tmpdir = value elif opt == '--write-symbol-info': ctx.symbol_info_filename = value elif opt == '--skip-cleanup': ctx.skip_cleanup = True elif opt == '--svnadmin': ctx.svnadmin_executable = value elif opt == '--co': co_executable = value elif opt == '--cvs': cvs_executable = value elif opt == '--sort': ctx.sort_executable = value elif opt == '--dump-only': dump_only = True Log().error( warning_prefix + ': The --dump-only option is deprecated (it is implied\n' 'by --dumpfile).\n' ) elif opt == '--create': Log().error( warning_prefix + ': The behaviour produced by the --create option is now the ' 'default,\nand passing the option is deprecated.\n' ) # Consistency check for options and arguments. if len(self.args) == 0: self.usage() sys.exit(1) if len(self.args) > 1: Log().error(error_prefix + ": must pass only one CVS repository.\n") self.usage() sys.exit(1) cvsroot = self.args[0] if dump_only and not dumpfile: raise FatalError("'--dump-only' requires '--dumpfile' to be specified.") if (not target) and (not dumpfile) and (not ctx.dry_run): raise FatalError("must pass one of '-s' or '--dumpfile'.") def not_both(opt1val, opt1name, opt2val, opt2name): if opt1val and opt2val: raise FatalError("cannot pass both '%s' and '%s'." % (opt1name, opt2name,)) not_both(target, '-s', dumpfile, '--dumpfile') not_both(dumpfile, '--dumpfile', existing_svnrepos, '--existing-svnrepos') not_both(bdb_txn_nosync, '--bdb-txn-nosync', existing_svnrepos, '--existing-svnrepos') not_both(dumpfile, '--dumpfile', bdb_txn_nosync, '--bdb-txn-nosync') not_both(fs_type, '--fs-type', existing_svnrepos, '--existing-svnrepos') not_both(use_rcs, '--use-rcs', use_cvs, '--use-cvs') not_both(use_rcs, '--use-rcs', use_internal_co, '--use-internal-co') not_both(use_cvs, '--use-cvs', use_internal_co, '--use-internal-co') not_both(ctx.trunk_only, '--trunk-only', force_branch, '--force-branch') not_both(ctx.trunk_only, '--trunk-only', force_tag, '--force-tag') if fs_type and fs_type != 'bdb' and bdb_txn_nosync: raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s." % fs_type) if target: if existing_svnrepos: ctx.output_option = ExistingRepositoryOutputOption(target) else: ctx.output_option = NewRepositoryOutputOption( target, fs_type=fs_type, bdb_txn_nosync=bdb_txn_nosync) else: ctx.output_option = DumpfileOutputOption(dumpfile) if use_rcs: ctx.revision_recorder = NullRevisionRecorder() ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = RCSRevisionReader(co_executable) elif use_cvs: ctx.revision_recorder = NullRevisionRecorder() ctx.revision_excluder = NullRevisionExcluder() ctx.revision_reader = CVSRevisionReader(cvs_executable) else: # --use-internal-co is the default: ctx.revision_recorder = InternalRevisionRecorder(compress=True) ctx.revision_excluder = InternalRevisionExcluder() ctx.revision_reader = InternalRevisionReader(compress=True) try: ctx.cvs_author_decoder = CVSTextDecoder(encodings, fallback_encoding) ctx.cvs_log_decoder = CVSTextDecoder(encodings, fallback_encoding) # Don't use fallback_encoding for filenames: ctx.cvs_filename_decoder = CVSTextDecoder(encodings) except LookupError, e: raise FatalError(str(e))
def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db