Python Seq._translate_str 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Bio

클래스/타입: Seq

메소드/함수: _translate_str

hotexamples.com에서의 예제들: 2

Python Seq._translate_str - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Bio.Seq._translate_str에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MutableSeq(30)

Seq(30)

reverse_complement(30)

translate(30)

UnknownSeq(22)

transcribe(11)

back_transcribe(10)

complement(5)

find(4)

_translate_str(2)

tostring(1)

예제 #1

파일 보기

def call_annotation_variant(annotation_file, ref_aligned, seq_aligned, ref_positions, seq_positions, sequence_id = 666):
    table = CodonTable.ambiguous_dna_by_id[1]

    list_annotations = []

    class Ann:
        def __init__(self, ann_type, ann_pos, gene, protein, protein_id, aa_seq):
            self.ann_type = ann_type
            self.ann_pos = ann_pos
            self.gene = gene
            self.protein = protein
            self.protein_id = protein_id
            self.aa_seq = aa_seq

    def parse_pos(l):
        return [(int(pos.strip().split(",")[0]), int(pos.strip().split(",")[1])) for pos in l.strip().split(";")]

    ref_annotations = []
    with open(annotation_file) as f:
        for line in f:
            s = line.strip().split("\t")
            ann_type = s[2]
            ann_pos = parse_pos(s[3])
            gene = None if s[4] == "." else s[4]
            protein = None if s[5] == "." else s[5]
            protein_id = None if s[6] == "." else s[6]
            aa_seq = None if s[7] == "." else s[7]
            ref_annotations.append(Ann(ann_type, ann_pos, gene, protein, protein_id, aa_seq))

    proteins_not_multiple_of_3 = []
    for annotation in ref_annotations:

        gene = annotation.gene
        protein = annotation.protein
        protein_id = annotation.protein_id
        atype = annotation.ann_type
        nuc_start = annotation.ann_pos[0][0]
        nuc_stop = annotation.ann_pos[-1][1]

        # get the nucleotide sequence
        nuc_seq = "".join(
            [x[1] for x in zip(ref_positions, seq_aligned) if nuc_start <= x[0] <= nuc_stop]).replace("-", "")


        list_mutations = []
        if annotation.ann_type == 'mature_protein_region' or annotation.ann_type == 'CDS':

            # dna_ref is the concatenation of the nucleotides of the aligned seq within the range(s) of this protein
            # with gaps deleted. This string is what is translated in the cell for this protein.
            dna_ref = ''
            for (start, stop) in annotation.ann_pos:
                dna_ref += "".join([x[1] for x in zip(ref_positions, seq_aligned) if start <= x[0] <= stop]).replace("-", "")

            if len(dna_ref) % 3 == 0 and len(dna_ref) > 0:
                aa_seq_with_symbols = Seq._translate_str(dna_ref, table, cds=False)
                # symbols e.g. * that means Ter
                aa_seq = aa_seq_with_symbols.replace("*", "")

                # annotation.aa_seq is the reference AA sequence from the annotation files for this protein
                alignment_aa = pairwise2.align.globalms(annotation.aa_seq, aa_seq, 3, -1, -3, -1)

                try:
                    ref_aligned_aa = alignment_aa[0][0]
                    seq_aligned_aa = alignment_aa[0][1]
                except IndexError:
                    continue

                ref_positions_aa = np.zeros(len(seq_aligned_aa), dtype=int)
                pos = 0
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] != '-':
                        pos += 1
                    ref_positions_aa[i] = pos

                seq_positions_aa = np.zeros(len(seq_aligned_aa), dtype=int)
                pos = 0
                for i in range(len(ref_aligned_aa)):
                    if seq_aligned_aa[i] != '-':
                        pos += 1
                    seq_positions_aa[i] = pos



                list_mutations = []

                ins_open = False
                ins_len = 0
                ins_pos = None
                ins_seq = ""
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] == '-':
                        ins_open = True
                        ins_len += 1
                        ins_pos = ref_positions_aa[i]
                        ins_seq += seq_aligned_aa[i]
                    else:
                        if ins_open:
                            v = (gene, protein, protein_id, ins_pos, "-" * ins_len, ins_seq, "INS")
                            list_mutations.append(v)

                            ins_open = False
                            ins_len = 0
                            ins_pos = None
                            ins_seq = ""
                if ins_open:
                    v = (gene, protein, protein_id, ins_pos, "-" * ins_len, ins_seq, "INS")
                    list_mutations.append(v)

                del_open = False
                del_len = 0
                del_pos = None
                del_seq = ""
                for i in range(len(ref_aligned_aa)):
                    if seq_aligned_aa[i] == '-':
                        if not del_open:
                            del_pos = ref_positions_aa[i]
                        del_pos_seq = seq_positions_aa[i]
                        del_open = True
                        del_len += 1
                        del_seq += ref_aligned_aa[i]
                    else:
                        if del_open:
                            v = (gene, protein, protein_id, del_pos, del_seq, "-" * del_len, "DEL")
                            list_mutations.append(v)

                            del_open = False
                            del_len = 0
                            del_pos = None
                            del_pos_seq = None
                            del_seq = ""

                if del_open:
                    v = (gene, protein, protein_id, del_pos, del_seq, "-" * del_len, "DEL")
                    list_mutations.append(v)

                mut_open = False
                mut_len = 0
                mut_pos = None
                mut_pos_seq = None
                mut_seq_original = ""
                mut_seq_mutated = ""
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] != '-' and seq_aligned_aa[i] != '-' and ref_aligned_aa[i] != seq_aligned_aa[i]:
                        if not mut_open:
                            mut_pos = ref_positions_aa[i]
                            mut_pos_seq = seq_positions_aa[i]
                        mut_open = True
                        mut_len += 1
                        mut_seq_original += ref_aligned_aa[i]
                        mut_seq_mutated += seq_aligned_aa[i]
                    else:
                        if mut_open:
                            v = (gene, protein, protein_id, mut_pos, mut_seq_original, mut_seq_mutated, "SUB")
                            list_mutations.append(v)

                            mut_open = False
                            mut_len = 0
                            mut_pos = None
                            mut_pos_seq = None
                            mut_seq_original = ""
                            mut_seq_mutated = ""

                if mut_open:
                    v = (gene, protein, protein_id, mut_pos, mut_seq_original, mut_seq_mutated, "SUB")
                    list_mutations.append(v)

                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, nuc_seq, aa_seq, list_mutations))

            elif len(dna_ref) == 0:
                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, None, None, []))

            else:   # nucleotide sequence not multiple of 3
                proteins_not_multiple_of_3.append(protein)
                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, nuc_seq, None, []))
        elif atype == 'gene':
            list_annotations.append(
                (gene, protein, protein_id, atype, nuc_start, nuc_stop, nuc_seq, None, []))
    # log frameshift not detectable in the final output
    if len(proteins_not_multiple_of_3) > 0:
        logger.warning(f"sequence ID {sequence_id} has proteins of length not multiple of 3 {proteins_not_multiple_of_3}")
    return list_annotations

예제 #2

파일 보기

def call_annotation_variant(annotation_file, ref_aligned, seq_aligned, ref_positions, seq_positions, sequence_id = 666):
    table = CodonTable.ambiguous_dna_by_id[1]

    list_annotations = []

    class Ann:
        def __init__(self, ann_type, ann_pos, gene, protein, protein_id, aa_seq):
            self.ann_type = ann_type
            self.ann_pos = ann_pos
            self.gene = gene
            self.protein = protein
            self.protein_id = protein_id
            self.aa_seq = aa_seq

    def parse_pos(l):
        return [(int(pos.strip().split(",")[0]), int(pos.strip().split(",")[1])) for pos in l.strip().split(";")]

    ref_annotations = []
    with open(annotation_file) as f:
        for line in f:
            s = line.strip().split("\t")
            ann_type = s[2]
            ann_pos = parse_pos(s[3])
            gene = None if s[4] == "." else s[4]
            protein = None if s[5] == "." else s[5]
            protein_id = None if s[6] == "." else s[6]
            aa_seq = None if s[7] == "." else s[7]
            ref_annotations.append(Ann(ann_type, ann_pos, gene, protein, protein_id, aa_seq))

    for annotation in ref_annotations:

        gene = annotation.gene
        protein = annotation.protein
        protein_id = annotation.protein_id
        atype = annotation.ann_type
        nuc_start = annotation.ann_pos[0][0]
        nuc_stop = annotation.ann_pos[-1][1]

        nuc_seq = "".join(
            [x[1] for x in zip(ref_positions, seq_aligned) if nuc_start <= x[0] and nuc_stop >= x[0]]).replace("-", "")

        #if nuc_seq is not None and aa_seq is None:
        #    logger.warning('nuc_seq is not None and aa_seq is None (' + gene + ',' + protein + ')')


        list_mutations = []
        if annotation.ann_type == 'mature_protein_region' or annotation.ann_type == 'CDS':
            dna_ref = ''
            for (start, stop) in annotation.ann_pos:
                dna_ref += "".join([x[1] for x in zip(ref_positions, seq_aligned) if start <= x[0] and stop >= x[0]]).replace("-", "")

            if len(dna_ref)%3 == 0 and len(dna_ref) > 0:
                aa_seq = Seq._translate_str(dna_ref, table, cds=False).replace("*", "")

                alignment_aa = pairwise2.align.globalms(annotation.aa_seq, aa_seq, 3, -1, -3, -1)

                try:
                    ref_aligned_aa = alignment_aa[0][0]
                    seq_aligned_aa = alignment_aa[0][1]
                except IndexError:
                    continue

                ref_positions_aa = np.zeros(len(seq_aligned_aa), dtype=int)
                pos = 0
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] != '-':
                        pos += 1
                    ref_positions_aa[i] = pos

                seq_positions_aa = np.zeros(len(seq_aligned_aa), dtype=int)
                pos = 0
                for i in range(len(ref_aligned_aa)):
                    if seq_aligned_aa[i] != '-':
                        pos += 1
                    seq_positions_aa[i] = pos



                list_mutations = []

                ins_open = False
                ins_len = 0
                ins_pos = None
                ins_seq = ""
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] == '-':
                        ins_open = True
                        ins_len += 1
                        ins_pos = ref_positions_aa[i]
                        ins_seq += seq_aligned_aa[i]
                    else:
                        if ins_open:
                            v = (gene, protein, protein_id, ins_pos, "-" * ins_len, ins_seq, "INS")
                            list_mutations.append(v)

                            ins_open = False
                            ins_len = 0
                            ins_pos = None
                            ins_seq = ""
                if ins_open:
                    v = (gene, protein, protein_id, ins_pos, "-" * ins_len, ins_seq, "INS")
                    list_mutations.append(v)

                del_open = False
                del_len = 0
                del_pos = None
                del_seq = ""
                for i in range(len(ref_aligned_aa)):
                    if seq_aligned_aa[i] == '-':
                        if not del_open:
                            del_pos = ref_positions_aa[i]
                        del_pos_seq = seq_positions_aa[i]
                        del_open = True
                        del_len += 1
                        del_seq += ref_aligned_aa[i]
                    else:
                        if del_open:
                            v = (gene, protein, protein_id, del_pos, del_seq, "-" * del_len, "DEL")
                            list_mutations.append(v)

                            del_open = False
                            del_len = 0
                            del_pos = None
                            del_pos_seq = None
                            del_seq = ""

                if del_open:
                    v = (gene, protein, protein_id, del_pos, del_seq, "-" * del_len, "DEL")
                    list_mutations.append(v)

                mut_open = False
                mut_len = 0
                mut_pos = None
                mut_pos_seq = None
                mut_seq_original = ""
                mut_seq_mutated = ""
                for i in range(len(ref_aligned_aa)):
                    if ref_aligned_aa[i] != '-' and seq_aligned_aa[i] != '-' and ref_aligned_aa[i] != seq_aligned_aa[i]:
                        if not mut_open:
                            mut_pos = ref_positions_aa[i]
                            mut_pos_seq = seq_positions_aa[i]
                        mut_open = True
                        mut_len += 1
                        mut_seq_original += ref_aligned_aa[i]
                        mut_seq_mutated += seq_aligned_aa[i]
                    else:
                        if mut_open:
                            v = (gene, protein, protein_id, mut_pos, mut_seq_original, mut_seq_mutated, "SUB")
                            list_mutations.append(v)

                            mut_open = False
                            mut_len = 0
                            mut_pos = None
                            mut_pos_seq = None
                            mut_seq_original = ""
                            mut_seq_mutated = ""

                if mut_open:
                    v = (gene, protein, protein_id, mut_pos, mut_seq_original, mut_seq_mutated, "SUB")
                    list_mutations.append(v)

                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, nuc_seq, aa_seq, list_mutations))

            elif len(dna_ref) == 0:
                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, None, None, []))

            else:
                list_annotations.append(
                    (gene, protein, protein_id, atype, nuc_start, nuc_stop, nuc_seq, None, []))

    return list_annotations