Ejemplo n.º 1
0
def make_new_subsequence(
        seqid: str, start: int, end: int, gene_itree: IntervalTree,
        hint_itree: Optional[IntervalTree], gene_features: Sequence[GFFRecord],
        hint_features: Optional[Sequence[GFFRecord]],
        seq: SeqRecord) -> Tuple[str, SeqRecord, GFF, Optional[GFF]]:

    gene_intervals = gene_itree[start:end]

    min_gene_start = min(f.begin for f in gene_intervals) - 10
    max_gene_end = max(f.end for f in gene_intervals) + 10

    start = min([start, min_gene_start])
    if start < 0:
        start = 0

    end = max([end, max_gene_end])
    if end > len(seq):
        end = len(seq)

    if hint_itree is None:
        hint_intervals = None
    else:
        hint_intervals = [
            i for i in hint_itree[start:end]
            if i.begin >= start and i.end <= end
        ]

    name = f"{seqid}:{start}-{end}"

    subseq = FeatureLocation(start, end, 1).extract(seq)
    subseq.id = name
    subseq.name = name
    subseq.description = name

    subgenes = GFF([gene_features[i.data] for i in gene_intervals])
    subgenes_shifted = shift_gff(subgenes, name, start)

    if hint_intervals is None or hint_features is None:
        subhints_shifted = None
    else:
        subhints = GFF([hint_features[i.data] for i in hint_intervals])
        subhints_shifted = shift_gff(subhints, name, start)

    return name, subseq, subgenes_shifted, subhints_shifted