def find_all_orfs(record: Record, cluster: Optional[Cluster] = None) -> List[CDSFeature]: """ Find all ORFs of at least 60 bases that don't overlap with existing CDS features. Can (and should) be limited to just within a cluster. Arguments: record: the record to search cluster: the specific Cluster to search within, or None Returns: a list of CDSFeatures, one for each ORF """ # Get sequence for the range offset = 0 seq = record.seq existing = record.get_cds_features() if cluster: seq = record.seq[cluster.location.start:cluster.location.end] offset = cluster.location.start existing = tuple(cluster.cds_children) # Find orfs throughout the range forward_matches = scan_orfs(seq, 1, offset) reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset) locations = forward_matches + reverse_matches orfnr = 1 new_features = [] for location in locations: if cluster: if isinstance(location.start, (BeforePosition, AfterPosition)): continue if isinstance(location.end, (BeforePosition, AfterPosition)): continue dummy_feature = Feature(location, feature_type="dummy") # skip if overlaps with existing CDSs if any(dummy_feature.overlaps_with(cds) for cds in existing): continue feature = create_feature_from_location(record, location, orfnr) # skip if not wholly contained in the cluster if cluster and not feature.is_contained_by(cluster): continue new_features.append(feature) orfnr += 1 return new_features
def find_all_orfs(record: Record, area: Optional[CDSCollection] = None) -> List[CDSFeature]: """ Find all ORFs of at least 60 bases that don't overlap with existing CDS features. Can (and should) be limited to just within a specific section of the record. Arguments: record: the record to search area: the specific CDSCollection to search within, or None Returns: a list of CDSFeatures, one for each ORF """ # Get sequence for the range offset = 0 seq = record.seq existing: Iterable[CDSFeature] = record.get_cds_features() if area: seq = area.extract(seq) offset = area.location.start existing = record.get_cds_features_within_location(area.location, with_overlapping=True) # Find orfs throughout the range forward_matches = scan_orfs(seq, 1, offset) reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset) locations = forward_matches + reverse_matches new_features = [] for location in locations: if area: if isinstance(location.start, (BeforePosition, AfterPosition)): continue if isinstance(location.end, (BeforePosition, AfterPosition)): continue dummy_feature = Feature(location, feature_type="dummy") # skip if overlaps with existing CDSs if any(dummy_feature.overlaps_with(cds) for cds in existing): continue feature = create_feature_from_location(record, location) # skip if not wholly contained in the area if area and not feature.is_contained_by(area): continue new_features.append(feature) return new_features