def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, )
def __init__(self, revision_recorder, stats_keeper): self.revision_recorder = revision_recorder self._cvs_item_store = NewCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) self.metadata_db = MetadataDatabase( artifact_manager.get_temp_file(config.METADATA_STORE), artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), DB_OPEN_NEW, ) self.metadata_logger = MetadataLogger(self.metadata_db) self.fatal_errors = [] self.num_files = 0 self.symbol_stats = SymbolStatisticsCollector() self.stats_keeper = stats_keeper # Key generator for CVSFiles: self.file_key_generator = KeyGenerator() # Key generator for CVSItems: self.item_key_generator = KeyGenerator() # Key generator for Symbols: self.symbol_key_generator = KeyGenerator() self.revision_recorder.start()
def start(self): self._delta_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_READ, ) self._delta_db.__delitem__ = lambda id: None self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ, ) serializer = MarshalSerializer() if self._compress: serializer = CompressingSerializer(serializer) self._co_db = self._Database( artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW, serializer, ) # The set of CVSFile instances whose TextRecords have already been # read: self._loaded_files = set() # A map { CVSFILE : _FileTree } for files that currently have live # revisions: self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
def run(self, run_options, stats_keeper): self.cvs_item_store = IndexedCVSItemStore( artifact_manager.get_temp_file(self.cvs_items_store_file), artifact_manager.get_temp_file(self.cvs_items_store_index_file), DB_OPEN_READ) CheckDependenciesPass.run(self, run_options, stats_keeper) self.cvs_item_store.close() self.cvs_item_store = None
def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file( config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def show_filtered_cvs_item_store(): from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore db = IndexedCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_FILTERED_STORE), artifact_manager.get_temp_file(config.CVS_ITEMS_FILTERED_INDEX_TABLE), DB_OPEN_READ) ids = list(db.iterkeys()) ids.sort() for id in ids: cvs_item = db[id] print "%6x: %r" % (cvs_item.id, cvs_item,)
def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def show_filtered_cvs_item_store(): from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore db = IndexedCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_FILTERED_STORE), artifact_manager.get_temp_file(config.CVS_ITEMS_FILTERED_INDEX_TABLE), DB_OPEN_READ) ids = list(db.iterkeys()) ids.sort() for id in ids: cvs_item = db[id] print "%6x: %r" % ( cvs_item.id, cvs_item, )
def close(self): """Store the stats database to the SYMBOL_STATISTICS file.""" f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb') cPickle.dump(self._stats.values(), f, -1) f.close() self._stats = None
def close(self): if self.mode == DB_OPEN_NEW: f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'wb') cPickle.dump(self._cvs_files, f, -1) f.close() self._cvs_files = None
def __init__(self): """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and reads the offsets database into memory.""" self.symbolings = open( artifact_manager.get_temp_file( config.SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r') # The offsets_db is really small, and we need to read and write # from it a fair bit, so suck it into memory offsets_db = file( artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'rb') # A map from symbol_id to offset. The values of this map are # incremented as the openings and closings for a symbol are # consumed. self.offsets = cPickle.load(offsets_db) offsets_db.close()
def close(self): if self.mode == DB_OPEN_NEW: f = open(artifact_manager.get_temp_file(config.CVS_PATHS_DB), 'wb') cPickle.dump(self._cvs_paths.values(), f, -1) f.close() self._cvs_paths = None
def close(self): if self.mode == DB_OPEN_NEW: self.set_cvs_path_ordinals() f = open(artifact_manager.get_temp_file(config.CVS_PATHS_DB), 'wb') cPickle.dump(self._cvs_paths.values(), f, -1) f.close() self._cvs_paths = None
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError("Invalid 'mode' argument to PersistenceManager") primer = (SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer, ) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM), )
def create_symbol_database(symbols): """Create and fill a symbol database. Record each symbol that is listed in SYMBOLS, which is an iterable containing Trunk and TypedSymbol objects.""" f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'wb') cPickle.dump(symbols, f, -1) f.close()
def __init__(self): # A map { id : TypedSymbol } self._symbols = {} f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'rb') symbols = cPickle.load(f) f.close() for symbol in symbols: self._symbols[symbol.id] = symbol
def start(self): self.revision_reader.start() if self.blob_filename is None: self.dump_file = open( artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'wb', ) else: self.dump_file = open(self.blob_filename, 'wb') self._mark_generator = KeyGenerator()
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" primer = ( SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit, ) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
def __init__(self, stats_keeper): self._cvs_item_store = NewCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) self.metadata_db = MetadataDatabase( artifact_manager.get_temp_file(config.METADATA_STORE), artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), DB_OPEN_NEW, ) self.metadata_logger = MetadataLogger(self.metadata_db) self.fatal_errors = [] self.num_files = 0 self.symbol_stats = SymbolStatisticsCollector() self.stats_keeper = stats_keeper # Key generator for CVSItems: self.item_key_generator = KeyGenerator() # Key generator for Symbols: self.symbol_key_generator = KeyGenerator()
def start(self, mirror, f): GitRevisionWriter.start(self, mirror, f) if Ctx().revision_collector.blob_filename is None: # The revision collector wrote the blobs to a temporary file; # copy them into f: logger.normal('Copying blob data to output') blobf = open( artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE), 'rb', ) shutil.copyfileobj(blobf, f) blobf.close()
def read_stats_keeper(): """Factory function: Return a _StatsKeeper instance. If STATISTICS_FILE exists, read the instance from the file; otherwise, create and return a new instance.""" filename = artifact_manager.get_temp_file(config.STATISTICS_FILE) f = open(filename, 'rb') retval = cPickle.load(f) f.close() return retval
def iter_cvs_items(self): cvs_item_store = OldCVSItemStore( artifact_manager.get_temp_file(self.cvs_items_store_file)) for cvs_file_items in cvs_item_store.iter_cvs_file_items(): self.current_cvs_file_items = cvs_file_items for cvs_item in cvs_file_items.values(): yield cvs_item del self.current_cvs_file_items cvs_item_store.close()
def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [ ] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer() ) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0
def start(self): self._mark_generator = KeyGenerator() logger.normal('Starting generate_blobs.py...') if self.blob_filename is None: blob_filename = artifact_manager.get_temp_file(config.GIT_BLOB_DATAFILE) else: blob_filename = self.blob_filename self._pipe = subprocess.Popen( [ sys.executable, os.path.join(os.path.dirname(__file__), 'generate_blobs.py'), blob_filename, ], stdin=subprocess.PIPE, )
def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file( config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer()) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0
def __init__(self, mode): """Initialize an instance, opening database in MODE (like the MODE argument to Database or anydbm.open()).""" self.mode = mode if self.mode == DB_OPEN_NEW: # A list of CVSFile instances where _cvs_files[cvs_file.id] == # cvs_file. If there are any gaps in the numerical sequence, # the corresponding array positions are None. self._cvs_files = [] elif self.mode == DB_OPEN_READ: f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'rb') self._cvs_files = cPickle.load(f) f.close() else: raise RuntimeError('Invalid mode %r' % self.mode)
def __init__(self, mode): """Initialize an instance, opening database in MODE (where MODE is either DB_OPEN_NEW or DB_OPEN_READ).""" self.mode = mode # A map { id : CVSPath } self._cvs_paths = {} if self.mode == DB_OPEN_NEW: pass elif self.mode == DB_OPEN_READ: f = open(artifact_manager.get_temp_file(config.CVS_PATHS_DB), 'rb') cvs_paths = cPickle.load(f) for cvs_path in cvs_paths: self._cvs_paths[cvs_path.id] = cvs_path else: raise RuntimeError('Invalid mode %r' % self.mode)
def run(self, run_options, stats_keeper): Ctx()._projects = read_projects( artifact_manager.get_temp_file(config.PROJECTS)) Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ) self.symbol_db = SymbolDatabase() Ctx()._symbol_db = self.symbol_db logger.quiet("Checking dependency consistency...") fatal_errors = [] for cvs_item in self.iter_cvs_items(): # Check that the pred_ids and succ_ids are mutually consistent: for pred_id in cvs_item.get_pred_ids(): pred = self.get_cvs_item(pred_id) if not cvs_item.id in pred.get_succ_ids(): fatal_errors.append( '%s lists pred=%s, but not vice versa.' % ( cvs_item, pred, )) for succ_id in cvs_item.get_succ_ids(): succ = self.get_cvs_item(succ_id) if not cvs_item.id in succ.get_pred_ids(): fatal_errors.append( '%s lists succ=%s, but not vice versa.' % ( cvs_item, succ, )) if fatal_errors: raise FatalException('Dependencies inconsistent:\n' '%s\n' 'Exited due to fatal error(s).' % ('\n'.join(fatal_errors), )) self.symbol_db.close() self.symbol_db = None Ctx()._cvs_path_db.close() logger.quiet("Done")
def prime_ctx(): def rf(filename): artifact_manager.register_temp_file(filename, None) from cvs2svn_lib.common import DB_OPEN_READ from cvs2svn_lib.symbol_database import SymbolDatabase from cvs2svn_lib.cvs_path_database import CVSPathDatabase rf(config.CVS_PATHS_DB) rf(config.SYMBOL_DB) from cvs2svn_lib.cvs_item_database import OldCVSItemStore from cvs2svn_lib.metadata_database import MetadataDatabase rf(config.METADATA_DB) rf(config.CVS_ITEMS_STORE) rf(config.CVS_ITEMS_FILTERED_STORE) rf(config.CVS_ITEMS_FILTERED_INDEX_TABLE) artifact_manager.pass_started(None) Ctx()._projects = ProjectList() Ctx()._symbol_db = SymbolDatabase() Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ) Ctx()._cvs_items_db = OldCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) Ctx()._metadata_db = MetadataDatabase(DB_OPEN_READ)
def run(self, run_options, stats_keeper): Ctx()._projects = read_projects( artifact_manager.get_temp_file(config.PROJECTS) ) Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ) self.symbol_db = SymbolDatabase() Ctx()._symbol_db = self.symbol_db logger.quiet("Checking dependency consistency...") fatal_errors = [] for cvs_item in self.iter_cvs_items(): # Check that the pred_ids and succ_ids are mutually consistent: for pred_id in cvs_item.get_pred_ids(): pred = self.get_cvs_item(pred_id) if not cvs_item.id in pred.get_succ_ids(): fatal_errors.append( '%s lists pred=%s, but not vice versa.' % (cvs_item, pred,)) for succ_id in cvs_item.get_succ_ids(): succ = self.get_cvs_item(succ_id) if not cvs_item.id in succ.get_pred_ids(): fatal_errors.append( '%s lists succ=%s, but not vice versa.' % (cvs_item, succ,)) if fatal_errors: raise FatalException( 'Dependencies inconsistent:\n' '%s\n' 'Exited due to fatal error(s).' % ('\n'.join(fatal_errors),) ) self.symbol_db.close() self.symbol_db = None Ctx()._cvs_path_db.close() logger.quiet("Done")
def prime_ctx(): def rf(filename): artifact_manager.register_temp_file(filename, None) from cvs2svn_lib.common import DB_OPEN_READ from cvs2svn_lib.symbol_database import SymbolDatabase from cvs2svn_lib.cvs_path_database import CVSPathDatabase rf(config.CVS_PATHS_DB) rf(config.SYMBOL_DB) from cvs2svn_lib.cvs_item_database import OldCVSItemStore from cvs2svn_lib.metadata_database import MetadataDatabase rf(config.METADATA_DB) rf(config.CVS_ITEMS_STORE) rf(config.CVS_ITEMS_FILTERED_STORE) rf(config.CVS_ITEMS_FILTERED_INDEX_TABLE) artifact_manager.pass_started(None) Ctx()._projects = ProjectList() Ctx()._symbol_db = SymbolDatabase() Ctx()._cvs_path_db = CVSPathDatabase(DB_OPEN_READ) Ctx()._cvs_items_db = OldCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE) ) Ctx()._metadata_db = MetadataDatabase(DB_OPEN_READ)
def archive(self): filename = artifact_manager.get_temp_file(config.STATISTICS_FILE) f = open(filename, 'wb') cPickle.dump(self, f) f.close()
def __init__(self): self.symbolings = open( artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), 'w')
def run(self, run_options): """Run the specified passes, one after another. RUN_OPTIONS will be passed to the Passes' run() methods. RUN_OPTIONS.start_pass is the number of the first pass that should be run. RUN_OPTIONS.end_pass is the number of the last pass that should be run. It must be that 1 <= RUN_OPTIONS.start_pass <= RUN_OPTIONS.end_pass <= self.num_passes.""" # Convert start_pass and end_pass into the indices of the passes # to execute, using the Python index range convention (i.e., first # pass executed and first pass *after* the ones that should be # executed). index_start = run_options.start_pass - 1 index_end = run_options.end_pass # Inform the artifact manager when artifacts are created and used: for (i, the_pass) in enumerate(self.passes): the_pass.register_artifacts() # Each pass creates a new version of the statistics file: artifact_manager.register_temp_file( config.STATISTICS_FILE % (i + 1, ), the_pass) if i != 0: # Each pass subsequent to the first reads the statistics file # from the preceding pass: artifact_manager.register_temp_file_needed( config.STATISTICS_FILE % (i + 1 - 1, ), the_pass) # Tell the artifact manager about passes that are being skipped this run: for the_pass in self.passes[0:index_start]: artifact_manager.pass_skipped(the_pass) start_time = time.time() for i in range(index_start, index_end): the_pass = self.passes[i] logger.quiet('----- pass %d (%s) -----' % ( i + 1, the_pass.name, )) artifact_manager.pass_started(the_pass) if i == 0: stats_keeper = StatsKeeper() else: stats_keeper = read_stats_keeper( artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1 - 1, ))) the_pass.run(run_options, stats_keeper) end_time = time.time() stats_keeper.log_duration_for_pass(end_time - start_time, i + 1, the_pass.name) logger.normal(stats_keeper.single_pass_timing(i + 1)) stats_keeper.archive( artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1, ))) start_time = end_time Ctx().clean() # Allow the artifact manager to clean up artifacts that are no # longer needed: artifact_manager.pass_done(the_pass, Ctx().skip_cleanup) self.garbage_collection_policy.check_for_garbage() # Tell the artifact manager about passes that are being deferred: for the_pass in self.passes[index_end:]: artifact_manager.pass_deferred(the_pass) logger.quiet(stats_keeper) logger.normal(stats_keeper.timings()) # Consistency check: artifact_manager.check_clean()
def run(self, run_options): """Run the specified passes, one after another. RUN_OPTIONS will be passed to the Passes' run() methods. RUN_OPTIONS.start_pass is the number of the first pass that should be run. RUN_OPTIONS.end_pass is the number of the last pass that should be run. It must be that 1 <= RUN_OPTIONS.start_pass <= RUN_OPTIONS.end_pass <= self.num_passes.""" # Convert start_pass and end_pass into the indices of the passes # to execute, using the Python index range convention (i.e., first # pass executed and first pass *after* the ones that should be # executed). index_start = run_options.start_pass - 1 index_end = run_options.end_pass # Inform the artifact manager when artifacts are created and used: for (i, the_pass) in enumerate(self.passes): the_pass.register_artifacts() # Each pass creates a new version of the statistics file: artifact_manager.register_temp_file( config.STATISTICS_FILE % (i + 1,), the_pass ) if i != 0: # Each pass subsequent to the first reads the statistics file # from the preceding pass: artifact_manager.register_temp_file_needed( config.STATISTICS_FILE % (i + 1 - 1,), the_pass ) # Tell the artifact manager about passes that are being skipped this run: for the_pass in self.passes[0:index_start]: artifact_manager.pass_skipped(the_pass) start_time = time.time() for i in range(index_start, index_end): the_pass = self.passes[i] logger.quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,)) artifact_manager.pass_started(the_pass) if i == 0: stats_keeper = StatsKeeper() else: stats_keeper = read_stats_keeper( artifact_manager.get_temp_file( config.STATISTICS_FILE % (i + 1 - 1,) ) ) the_pass.run(run_options, stats_keeper) end_time = time.time() stats_keeper.log_duration_for_pass( end_time - start_time, i + 1, the_pass.name ) logger.normal(stats_keeper.single_pass_timing(i + 1)) stats_keeper.archive( artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,)) ) start_time = end_time Ctx().clean() # Allow the artifact manager to clean up artifacts that are no # longer needed: artifact_manager.pass_done(the_pass, Ctx().skip_cleanup) check_for_garbage() # Tell the artifact manager about passes that are being deferred: for the_pass in self.passes[index_end:]: artifact_manager.pass_deferred(the_pass) logger.quiet(stats_keeper) logger.normal(stats_keeper.timings()) # Consistency check: artifact_manager.check_clean()