def tree_completed(self): """The revision tree has been parsed. Analyze it for consistency and connect some loose ends. This is a callback method declared in Sink.""" self._resolve_primary_dependencies() self._resolve_branch_dependencies() self._sort_branches() self._resolve_tag_dependencies() # Compute the preliminary CVSFileItems for this file: cvs_items = [] cvs_items.extend(self._get_cvs_revisions()) cvs_items.extend(self._get_cvs_branches()) cvs_items.extend(self._get_cvs_tags()) self._cvs_file_items = CVSFileItems( self.cvs_file, self.pdc.trunk, cvs_items ) self._cvs_file_items.check_link_consistency() # Tell the revision recorder about the file dependency tree. self.collect_data.revision_recorder.start_file(self._cvs_file_items)
def tree_completed(self): """The revision tree has been parsed. Analyze it for consistency and connect some loose ends. This is a callback method declared in Sink.""" self._resolve_primary_dependencies() self._resolve_branch_dependencies() self._sort_branches() self._resolve_tag_dependencies() # Compute the preliminary CVSFileItems for this file: cvs_items = [] cvs_items.extend(self._get_cvs_revisions()) cvs_items.extend(self._get_cvs_branches()) cvs_items.extend(self._get_cvs_tags()) self._cvs_file_items = CVSFileItems( self.cvs_file, self.pdc.trunk, cvs_items ) self._cvs_file_items.check_link_consistency()
class _FileDataCollector(cvs2svn_rcsparse.Sink): """Class responsible for collecting RCS data for a particular file. Any collected data that need to be remembered are stored into the referenced CollectData instance.""" def __init__(self, pdc, cvs_file): """Create an object that is prepared to receive data for CVS_FILE. CVS_FILE is a CVSFile instance. COLLECT_DATA is used to store the information collected about the file.""" self.pdc = pdc self.cvs_file = cvs_file self.collect_data = self.pdc.collect_data self.project = self.cvs_file.project # A place to store information about the symbols in this file: self.sdc = _SymbolDataCollector(self, self.cvs_file) # { revision : _RevisionData instance } self._rev_data = { } # Lists [ (parent, child) ] of revision number pairs indicating # that revision child depends on revision parent along the main # line of development. self._primary_dependencies = [] # If set, this is an RCS branch number -- rcsparse calls this the # "principal branch", but CVS and RCS refer to it as the "default # branch", so that's what we call it, even though the rcsparse API # setter method is still 'set_principal_branch'. self.default_branch = None # True iff revision 1.1 of the file appears to have been imported # (as opposed to added normally). self._file_imported = False def _get_rev_id(self, revision): if revision is None: return None return self._rev_data[revision].cvs_rev_id def set_principal_branch(self, branch): """This is a callback method declared in Sink.""" if branch.find('.') == -1: # This just sets the default branch to trunk. Normally this # shouldn't occur, but it has been seen in at least one CVS # repository. Just ignore it. return m = _branch_revision_re.match(branch) if not m: self.collect_data.record_fatal_error( 'The default branch %s in file %r is not a valid branch number' % (branch, self.cvs_file.rcs_path,) ) return branch = m.group(1) + m.group(2) if branch.count('.') != 2: # We don't know how to deal with a non-top-level default # branch (what does CVS do?). So if this case is detected, # punt: self.collect_data.record_fatal_error( 'The default branch %s in file %r is not a top-level branch' % (branch, self.cvs_file.rcs_path,) ) return self.default_branch = branch def define_tag(self, name, revision): """Remember the symbol name and revision, but don't process them yet. This is a callback method declared in Sink.""" self.sdc.define_symbol(name, revision) def set_expansion(self, mode): """This is a callback method declared in Sink.""" self.cvs_file.mode = mode def admin_completed(self): """This is a callback method declared in Sink.""" self.sdc.process_symbols() def define_revision(self, revision, timestamp, author, state, branches, next): """This is a callback method declared in Sink.""" for branch in branches: try: branch_data = self.sdc.rev_to_branch_data(branch) except KeyError: # Normally we learn about the branches from the branch names # and numbers parsed from the symbolic name header. But this # must have been an unlabeled branch that slipped through the # net. Generate a name for it and create a _BranchData record # for it now. branch_data = self.sdc._add_unlabeled_branch( self.sdc.rev_to_branch_number(branch)) assert branch_data.child is None branch_data.child = branch if revision in self._rev_data: # This revision has already been seen. logger.error('File %r contains duplicate definitions of revision %s.' % (self.cvs_file.rcs_path, revision,)) raise RuntimeError() # Record basic information about the revision: rev_data = _RevisionData( self.collect_data.item_key_generator.gen_id(), revision, int(timestamp), author, state) self._rev_data[revision] = rev_data # When on trunk, the RCS 'next' revision number points to what # humans might consider to be the 'previous' revision number. For # example, 1.3's RCS 'next' is 1.2. # # However, on a branch, the RCS 'next' revision number really does # point to what humans would consider to be the 'next' revision # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2. # # In other words, in RCS, 'next' always means "where to find the next # deltatext that you need this revision to retrieve. # # That said, we don't *want* RCS's behavior here, so we determine # whether we're on trunk or a branch and set the dependencies # accordingly. if next: if is_trunk_revision(revision): self._primary_dependencies.append( (next, revision,) ) else: self._primary_dependencies.append( (revision, next,) ) def tree_completed(self): """The revision tree has been parsed. Analyze it for consistency and connect some loose ends. This is a callback method declared in Sink.""" self._resolve_primary_dependencies() self._resolve_branch_dependencies() self._sort_branches() self._resolve_tag_dependencies() # Compute the preliminary CVSFileItems for this file: cvs_items = [] cvs_items.extend(self._get_cvs_revisions()) cvs_items.extend(self._get_cvs_branches()) cvs_items.extend(self._get_cvs_tags()) self._cvs_file_items = CVSFileItems( self.cvs_file, self.pdc.trunk, cvs_items ) self._cvs_file_items.check_link_consistency() def _resolve_primary_dependencies(self): """Resolve the dependencies listed in self._primary_dependencies.""" for (parent, child,) in self._primary_dependencies: parent_data = self._rev_data[parent] assert parent_data.child is None parent_data.child = child child_data = self._rev_data[child] assert child_data.parent is None child_data.parent = parent def _resolve_branch_dependencies(self): """Resolve dependencies involving branches.""" for branch_data in self.sdc.branches_data.values(): # The branch_data's parent has the branch as a child regardless # of whether the branch had any subsequent commits: try: parent_data = self._rev_data[branch_data.parent] except KeyError: logger.warn( 'In %r:\n' ' branch %r references non-existing revision %s\n' ' and will be ignored.' % (self.cvs_file.rcs_path, branch_data.symbol.name, branch_data.parent,)) del self.sdc.branches_data[branch_data.branch_number] else: parent_data.branches_data.append(branch_data) # If the branch has a child (i.e., something was committed on # the branch), then we store a reference to the branch_data # there, define the child's parent to be the branch's parent, # and list the child in the branch parent's branches_revs_data: if branch_data.child is not None: child_data = self._rev_data[branch_data.child] assert child_data.parent_branch_data is None child_data.parent_branch_data = branch_data assert child_data.parent is None child_data.parent = branch_data.parent parent_data.branches_revs_data.append(branch_data.child) def _sort_branches(self): """Sort the branches sprouting from each revision in creation order. Creation order is taken to be the reverse of the order that they are listed in the symbols part of the RCS file. (If a branch is created then deleted, a later branch can be assigned the recycled branch number; therefore branch numbers are not an indication of creation order.)""" for rev_data in self._rev_data.values(): rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id)) def _resolve_tag_dependencies(self): """Resolve dependencies involving tags.""" for (rev, tag_data_list) in self.sdc.tags_data.items(): try: parent_data = self._rev_data[rev] except KeyError: logger.warn( 'In %r:\n' ' the following tag(s) reference non-existing revision %s\n' ' and will be ignored:\n' ' %s' % ( self.cvs_file.rcs_path, rev, ', '.join([repr(tag_data.symbol.name) for tag_data in tag_data_list]),)) del self.sdc.tags_data[rev] else: for tag_data in tag_data_list: assert tag_data.rev == rev # The tag_data's rev has the tag as a child: parent_data.tags_data.append(tag_data) def _get_cvs_branches(self): """Generate the CVSBranches present in this file.""" for branch_data in self.sdc.branches_data.values(): yield CVSBranch( branch_data.id, self.cvs_file, branch_data.symbol, branch_data.branch_number, self.sdc.rev_to_lod(branch_data.parent), self._get_rev_id(branch_data.parent), self._get_rev_id(branch_data.child), None, ) def _get_cvs_tags(self): """Generate the CVSTags present in this file.""" for tags_data in self.sdc.tags_data.values(): for tag_data in tags_data: yield CVSTag( tag_data.id, self.cvs_file, tag_data.symbol, self.sdc.rev_to_lod(tag_data.rev), self._get_rev_id(tag_data.rev), None, ) def set_description(self, description): """This is a callback method declared in Sink.""" self.cvs_file.description = description self.cvs_file.determine_file_properties(Ctx().file_property_setters) def set_revision_info(self, revision, log, text): """This is a callback method declared in Sink.""" rev_data = self._rev_data[revision] cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id] if cvs_rev.metadata_id is not None: # Users have reported problems with repositories in which the # deltatext block for revision 1.1 appears twice. It is not # known whether this results from a CVS/RCS bug, or from botched # hand-editing of the repository. In any case, empirically, cvs # and rcs both use the first version when checking out data, so # that's what we will do. (For the record: "cvs log" fails on # such a file; "rlog" prints the log message from the first # block and ignores the second one.) logger.warn( "%s: in '%s':\n" " Deltatext block for revision %s appeared twice;\n" " ignoring the second occurrence.\n" % (warning_prefix, self.cvs_file.rcs_path, revision,) ) return if is_trunk_revision(revision): branch_name = None else: branch_name = self.sdc.rev_to_branch_data(revision).symbol.name cvs_rev.metadata_id = self.collect_data.metadata_logger.store( self.project, branch_name, rev_data.author, log ) cvs_rev.deltatext_exists = bool(text) # If this is revision 1.1, determine whether the file appears to # have been created via 'cvs add' instead of 'cvs import'. The # test is that the log message CVS uses for 1.1 in imports is # "Initial revision\n" with no period. (This fact helps determine # whether this file might have had a default branch in the past.) if revision == '1.1': self._file_imported = (log == 'Initial revision\n') def parse_completed(self): """Finish the processing of this file. This is a callback method declared in Sink.""" # Make sure that there was an info section for each revision: for cvs_item in self._cvs_file_items.values(): if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None: self.collect_data.record_fatal_error( '%r has no deltatext section for revision %s' % (self.cvs_file.rcs_path, cvs_item.rev,) ) def _determine_operation(self, rev_data): prev_rev_data = self._rev_data.get(rev_data.parent) return cvs_revision_type_map[( rev_data.state != 'dead', prev_rev_data is not None and prev_rev_data.state != 'dead', )] def _get_cvs_revisions(self): """Generate the CVSRevisions present in this file.""" for rev_data in self._rev_data.itervalues(): yield self._get_cvs_revision(rev_data) def _get_cvs_revision(self, rev_data): """Create and return a CVSRevision for REV_DATA.""" branch_ids = [ branch_data.id for branch_data in rev_data.branches_data ] branch_commit_ids = [ self._get_rev_id(rev) for rev in rev_data.branches_revs_data ] tag_ids = [ tag_data.id for tag_data in rev_data.tags_data ] revision_type = self._determine_operation(rev_data) return revision_type( self._get_rev_id(rev_data.rev), self.cvs_file, rev_data.timestamp, None, self._get_rev_id(rev_data.parent), self._get_rev_id(rev_data.child), rev_data.rev, True, self.sdc.rev_to_lod(rev_data.rev), rev_data.get_first_on_branch_id(), False, None, None, tag_ids, branch_ids, branch_commit_ids, rev_data.revision_reader_token ) def get_cvs_file_items(self): """Finish up and return a CVSFileItems instance for this file. This method must only be called once.""" self._process_ntdbrs() # Break a circular reference loop, allowing the memory for self # and sdc to be freed. del self.sdc return self._cvs_file_items def _process_ntdbrs(self): """Fix up any non-trunk default branch revisions (if present). If a non-trunk default branch is determined to have existed, yield the _RevisionData.ids for all revisions that were once non-trunk default revisions, in dependency order. There are two cases to handle: One case is simple. The RCS file lists a default branch explicitly in its header, such as '1.1.1'. In this case, we know that every revision on the vendor branch is to be treated as head of trunk at that point in time. But there's also a degenerate case. The RCS file does not currently have a default branch, yet we can deduce that for some period in the past it probably *did* have one. For example, the file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this case, we should record 1.1.1.96 as the last vendor revision to have been the head of the default branch. If any non-trunk default branch revisions are found: - Set their ntdbr members to True. - Connect the last one with revision 1.2. - Remove revision 1.1 if it is not needed. """ try: if self.default_branch: vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_cvs_branch_id] ) if not self._cvs_file_items.process_live_ntdb(vendor_lod_items): return elif self._file_imported: vendor_branch_data = self.sdc.branches_data.get('1.1.1') if vendor_branch_data is None: return else: vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_branch_data.id] ) if not self._cvs_file_items.process_historical_ntdb( vendor_lod_items ): return else: return except VendorBranchError, e: self.collect_data.record_fatal_error(str(e)) return if self._file_imported: self._cvs_file_items.imported_remove_1_1(vendor_lod_items) self._cvs_file_items.check_link_consistency()
class _FileDataCollector(Sink): """Class responsible for collecting RCS data for a particular file. Any collected data that need to be remembered are stored into the referenced CollectData instance.""" def __init__(self, pdc, cvs_file): """Create an object that is prepared to receive data for CVS_FILE. CVS_FILE is a CVSFile instance. COLLECT_DATA is used to store the information collected about the file.""" self.pdc = pdc self.cvs_file = cvs_file self.collect_data = self.pdc.collect_data self.project = self.cvs_file.project # A place to store information about the symbols in this file: self.sdc = _SymbolDataCollector(self, self.cvs_file) # { revision : _RevisionData instance } self._rev_data = {} # Lists [ (parent, child) ] of revision number pairs indicating # that revision child depends on revision parent along the main # line of development. self._primary_dependencies = [] # If set, this is an RCS branch number -- rcsparse calls this the # "principal branch", but CVS and RCS refer to it as the "default # branch", so that's what we call it, even though the rcsparse API # setter method is still 'set_principal_branch'. self.default_branch = None # True iff revision 1.1 of the file appears to have been imported # (as opposed to added normally). self._file_imported = False def _get_rev_id(self, revision): if revision is None: return None return self._rev_data[revision].cvs_rev_id def set_principal_branch(self, branch): """This is a callback method declared in Sink.""" if branch.find('.') == -1: # This just sets the default branch to trunk. Normally this # shouldn't occur, but it has been seen in at least one CVS # repository. Just ignore it. return m = _branch_revision_re.match(branch) if not m: self.collect_data.record_fatal_error( 'The default branch %s in file %r is not a valid branch number' % ( branch, self.cvs_file.rcs_path, )) return branch = m.group(1) + m.group(2) if branch.count('.') != 2: # We don't know how to deal with a non-top-level default # branch (what does CVS do?). So if this case is detected, # punt: self.collect_data.record_fatal_error( 'The default branch %s in file %r is not a top-level branch' % ( branch, self.cvs_file.rcs_path, )) return self.default_branch = branch def define_tag(self, name, revision): """Remember the symbol name and revision, but don't process them yet. This is a callback method declared in Sink.""" self.sdc.define_symbol(name, revision) def set_expansion(self, mode): """This is a callback method declared in Sink.""" self.cvs_file.mode = mode def admin_completed(self): """This is a callback method declared in Sink.""" self.sdc.process_symbols() def define_revision(self, revision, timestamp, author, state, branches, next): """This is a callback method declared in Sink.""" for branch in branches: try: branch_data = self.sdc.rev_to_branch_data(branch) except KeyError: # Normally we learn about the branches from the branch names # and numbers parsed from the symbolic name header. But this # must have been an unlabeled branch that slipped through the # net. Generate a name for it and create a _BranchData record # for it now. branch_data = self.sdc._add_unlabeled_branch( self.sdc.rev_to_branch_number(branch)) assert branch_data.child is None branch_data.child = branch if revision in self._rev_data: # This revision has already been seen. logger.error( 'File %r contains duplicate definitions of revision %s.' % ( self.cvs_file.rcs_path, revision, )) raise RuntimeError() # Record basic information about the revision: rev_data = _RevisionData(self.collect_data.item_key_generator.gen_id(), revision, int(timestamp), author, state) self._rev_data[revision] = rev_data # When on trunk, the RCS 'next' revision number points to what # humans might consider to be the 'previous' revision number. For # example, 1.3's RCS 'next' is 1.2. # # However, on a branch, the RCS 'next' revision number really does # point to what humans would consider to be the 'next' revision # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2. # # In other words, in RCS, 'next' always means "where to find the next # deltatext that you need this revision to retrieve. # # That said, we don't *want* RCS's behavior here, so we determine # whether we're on trunk or a branch and set the dependencies # accordingly. if next: if is_trunk_revision(revision): self._primary_dependencies.append(( next, revision, )) else: self._primary_dependencies.append(( revision, next, )) def tree_completed(self): """The revision tree has been parsed. Analyze it for consistency and connect some loose ends. This is a callback method declared in Sink.""" self._resolve_primary_dependencies() self._resolve_branch_dependencies() self._sort_branches() self._resolve_tag_dependencies() # Compute the preliminary CVSFileItems for this file: cvs_items = [] cvs_items.extend(self._get_cvs_revisions()) cvs_items.extend(self._get_cvs_branches()) cvs_items.extend(self._get_cvs_tags()) self._cvs_file_items = CVSFileItems(self.cvs_file, self.pdc.trunk, cvs_items) self._cvs_file_items.check_link_consistency() def _resolve_primary_dependencies(self): """Resolve the dependencies listed in self._primary_dependencies.""" for ( parent, child, ) in self._primary_dependencies: parent_data = self._rev_data[parent] assert parent_data.child is None parent_data.child = child child_data = self._rev_data[child] assert child_data.parent is None child_data.parent = parent def _resolve_branch_dependencies(self): """Resolve dependencies involving branches.""" for branch_data in self.sdc.branches_data.values(): # The branch_data's parent has the branch as a child regardless # of whether the branch had any subsequent commits: try: parent_data = self._rev_data[branch_data.parent] except KeyError: logger.warn( 'In %r:\n' ' branch %r references non-existing revision %s\n' ' and will be ignored.' % ( self.cvs_file.rcs_path, branch_data.symbol.name, branch_data.parent, )) del self.sdc.branches_data[branch_data.branch_number] else: parent_data.branches_data.append(branch_data) # If the branch has a child (i.e., something was committed on # the branch), then we store a reference to the branch_data # there, define the child's parent to be the branch's parent, # and list the child in the branch parent's branches_revs_data: if branch_data.child is not None: child_data = self._rev_data[branch_data.child] assert child_data.parent_branch_data is None child_data.parent_branch_data = branch_data assert child_data.parent is None child_data.parent = branch_data.parent parent_data.branches_revs_data.append(branch_data.child) def _sort_branches(self): """Sort the branches sprouting from each revision in creation order. Creation order is taken to be the reverse of the order that they are listed in the symbols part of the RCS file. (If a branch is created then deleted, a later branch can be assigned the recycled branch number; therefore branch numbers are not an indication of creation order.)""" for rev_data in self._rev_data.values(): rev_data.branches_data.sort(lambda a, b: -cmp(a.id, b.id)) def _resolve_tag_dependencies(self): """Resolve dependencies involving tags.""" for (rev, tag_data_list) in self.sdc.tags_data.items(): try: parent_data = self._rev_data[rev] except KeyError: logger.warn( 'In %r:\n' ' the following tag(s) reference non-existing revision %s\n' ' and will be ignored:\n' ' %s' % ( self.cvs_file.rcs_path, rev, ', '.join([ repr(tag_data.symbol.name) for tag_data in tag_data_list ]), )) del self.sdc.tags_data[rev] else: for tag_data in tag_data_list: assert tag_data.rev == rev # The tag_data's rev has the tag as a child: parent_data.tags_data.append(tag_data) def _get_cvs_branches(self): """Generate the CVSBranches present in this file.""" for branch_data in self.sdc.branches_data.values(): yield CVSBranch( branch_data.id, self.cvs_file, branch_data.symbol, branch_data.branch_number, self.sdc.rev_to_lod(branch_data.parent), self._get_rev_id(branch_data.parent), self._get_rev_id(branch_data.child), None, ) def _get_cvs_tags(self): """Generate the CVSTags present in this file.""" for tags_data in self.sdc.tags_data.values(): for tag_data in tags_data: yield CVSTag( tag_data.id, self.cvs_file, tag_data.symbol, self.sdc.rev_to_lod(tag_data.rev), self._get_rev_id(tag_data.rev), None, ) def set_description(self, description): """This is a callback method declared in Sink.""" self.cvs_file.description = description self.cvs_file.determine_file_properties(Ctx().file_property_setters) def set_revision_info(self, revision, log, text): """This is a callback method declared in Sink.""" rev_data = self._rev_data[revision] cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id] if cvs_rev.metadata_id is not None: # Users have reported problems with repositories in which the # deltatext block for revision 1.1 appears twice. It is not # known whether this results from a CVS/RCS bug, or from botched # hand-editing of the repository. In any case, empirically, cvs # and rcs both use the first version when checking out data, so # that's what we will do. (For the record: "cvs log" fails on # such a file; "rlog" prints the log message from the first # block and ignores the second one.) logger.warn("%s: in '%s':\n" " Deltatext block for revision %s appeared twice;\n" " ignoring the second occurrence.\n" % ( warning_prefix, self.cvs_file.rcs_path, revision, )) return if is_trunk_revision(revision): branch_name = None else: branch_name = self.sdc.rev_to_branch_data(revision).symbol.name cvs_rev.metadata_id = self.collect_data.metadata_logger.store( self.project, branch_name, rev_data.author, log) cvs_rev.deltatext_exists = bool(text) # If this is revision 1.1, determine whether the file appears to # have been created via 'cvs add' instead of 'cvs import'. The # test is that the log message CVS uses for 1.1 in imports is # "Initial revision\n" with no period. (This fact helps determine # whether this file might have had a default branch in the past.) if revision == '1.1': self._file_imported = (log == 'Initial revision\n') def parse_completed(self): """Finish the processing of this file. This is a callback method declared in Sink.""" # Make sure that there was an info section for each revision: for cvs_item in self._cvs_file_items.values(): if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None: self.collect_data.record_fatal_error( '%r has no deltatext section for revision %s' % ( self.cvs_file.rcs_path, cvs_item.rev, )) def _determine_operation(self, rev_data): prev_rev_data = self._rev_data.get(rev_data.parent) return cvs_revision_type_map[( rev_data.state != 'dead', prev_rev_data is not None and prev_rev_data.state != 'dead', )] def _get_cvs_revisions(self): """Generate the CVSRevisions present in this file.""" for rev_data in self._rev_data.itervalues(): yield self._get_cvs_revision(rev_data) def _get_cvs_revision(self, rev_data): """Create and return a CVSRevision for REV_DATA.""" branch_ids = [branch_data.id for branch_data in rev_data.branches_data] branch_commit_ids = [ self._get_rev_id(rev) for rev in rev_data.branches_revs_data ] tag_ids = [tag_data.id for tag_data in rev_data.tags_data] revision_type = self._determine_operation(rev_data) return revision_type(self._get_rev_id(rev_data.rev), self.cvs_file, rev_data.timestamp, None, self._get_rev_id(rev_data.parent), self._get_rev_id(rev_data.child), rev_data.rev, True, self.sdc.rev_to_lod(rev_data.rev), rev_data.get_first_on_branch_id(), False, None, None, tag_ids, branch_ids, branch_commit_ids, rev_data.revision_reader_token) def get_cvs_file_items(self): """Finish up and return a CVSFileItems instance for this file. This method must only be called once.""" self._process_ntdbrs() # Break a circular reference loop, allowing the memory for self # and sdc to be freed. del self.sdc return self._cvs_file_items def _process_ntdbrs(self): """Fix up any non-trunk default branch revisions (if present). If a non-trunk default branch is determined to have existed, yield the _RevisionData.ids for all revisions that were once non-trunk default revisions, in dependency order. There are two cases to handle: One case is simple. The RCS file lists a default branch explicitly in its header, such as '1.1.1'. In this case, we know that every revision on the vendor branch is to be treated as head of trunk at that point in time. But there's also a degenerate case. The RCS file does not currently have a default branch, yet we can deduce that for some period in the past it probably *did* have one. For example, the file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this case, we should record 1.1.1.96 as the last vendor revision to have been the head of the default branch. If any non-trunk default branch revisions are found: - Set their ntdbr members to True. - Connect the last one with revision 1.2. - Remove revision 1.1 if it is not needed. """ try: if self.default_branch: try: vendor_cvs_branch_id = self.sdc.branches_data[ self.default_branch].id except KeyError: logger.warn( '%s: In %s:\n' ' vendor branch %r is not present in file and will be ignored.' % ( warning_prefix, self.cvs_file.rcs_path, self.default_branch, )) self.default_branch = None return vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_cvs_branch_id]) if not self._cvs_file_items.process_live_ntdb( vendor_lod_items): return elif self._file_imported: vendor_branch_data = self.sdc.branches_data.get('1.1.1') if vendor_branch_data is None: return else: vendor_lod_items = self._cvs_file_items.get_lod_items( self._cvs_file_items[vendor_branch_data.id]) if not self._cvs_file_items.process_historical_ntdb( vendor_lod_items): return else: return except VendorBranchError, e: self.collect_data.record_fatal_error(str(e)) return if self._file_imported: self._cvs_file_items.imported_remove_1_1(vendor_lod_items) self._cvs_file_items.check_link_consistency()