Exemple #1
0
def find_all_orfs(record: Record,
                  cluster: Optional[Cluster] = None) -> List[CDSFeature]:
    """ Find all ORFs of at least 60 bases that don't overlap with existing
        CDS features.

        Can (and should) be limited to just within a cluster.

        Arguments:
            record: the record to search
            cluster: the specific Cluster to search within, or None

        Returns:
            a list of CDSFeatures, one for each ORF
    """
    # Get sequence for the range
    offset = 0
    seq = record.seq
    existing = record.get_cds_features()
    if cluster:
        seq = record.seq[cluster.location.start:cluster.location.end]
        offset = cluster.location.start
        existing = tuple(cluster.cds_children)

    # Find orfs throughout the range
    forward_matches = scan_orfs(seq, 1, offset)
    reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset)
    locations = forward_matches + reverse_matches

    orfnr = 1
    new_features = []

    for location in locations:
        if cluster:
            if isinstance(location.start, (BeforePosition, AfterPosition)):
                continue
            if isinstance(location.end, (BeforePosition, AfterPosition)):
                continue
        dummy_feature = Feature(location, feature_type="dummy")
        # skip if overlaps with existing CDSs
        if any(dummy_feature.overlaps_with(cds) for cds in existing):
            continue

        feature = create_feature_from_location(record, location, orfnr)

        # skip if not wholly contained in the cluster
        if cluster and not feature.is_contained_by(cluster):
            continue

        new_features.append(feature)
        orfnr += 1

    return new_features
Exemple #2
0
def find_all_orfs(record: Record, area: Optional[CDSCollection] = None) -> List[CDSFeature]:
    """ Find all ORFs of at least 60 bases that don't overlap with existing
        CDS features.

        Can (and should) be limited to just within a specific section of the record.

        Arguments:
            record: the record to search
            area: the specific CDSCollection to search within, or None

        Returns:
            a list of CDSFeatures, one for each ORF
    """
    # Get sequence for the range
    offset = 0
    seq = record.seq
    existing: Iterable[CDSFeature] = record.get_cds_features()
    if area:
        seq = area.extract(seq)
        offset = area.location.start
        existing = record.get_cds_features_within_location(area.location,
                                                           with_overlapping=True)

    # Find orfs throughout the range
    forward_matches = scan_orfs(seq, 1, offset)
    reverse_matches = scan_orfs(seq.reverse_complement(), -1, offset)
    locations = forward_matches + reverse_matches

    new_features = []

    for location in locations:
        if area:
            if isinstance(location.start, (BeforePosition, AfterPosition)):
                continue
            if isinstance(location.end, (BeforePosition, AfterPosition)):
                continue
        dummy_feature = Feature(location, feature_type="dummy")
        # skip if overlaps with existing CDSs
        if any(dummy_feature.overlaps_with(cds) for cds in existing):
            continue

        feature = create_feature_from_location(record, location)

        # skip if not wholly contained in the area
        if area and not feature.is_contained_by(area):
            continue

        new_features.append(feature)

    return new_features