Python translate Examples

Programming Language: Python

Namespace/Package Name: kipoiseq.transforms.functional

Method/Function: translate

Examples at hotexamples.com: 5

Python translate - 5 examples found. These are the top rated real world Python examples of kipoiseq.transforms.functional.translate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: protein.py Project: kipoi/kipoiseq

 def get_protein_seq(self, transcript_id: str):
     """
     Extract amino acid sequence for given transcript_id
     :param transcript_id: 
     :return: amino acid sequence
     """
     return translate(self.get_seq(transcript_id), hg38=True)

Example #2

Show file

File: protein.py Project: kipoi/kipoiseq

 def _prepare_seq(cls, *args, **kwargs):
     """
     Prepare the dna sequence and translate it into amino acid sequence
     :param seqs: current dna sequence
     :param intervals: the list of intervals corresponding to the sequence snippets
     :param reverse_complement: should the dna be reverse-complemented?
     :return: amino acid sequence
     """
     return translate(super()._prepare_seq(*args, **kwargs), hg38=True)

Example #3

Show file

def test_ensembl_uniprot_seq(tse):
    id_and_seq = {}
    with open(uniprot_seq_ref, 'r+') as f:
        key = ""
        for line in f:
            if '>' in line:
                key = (line.replace('>', '')).rstrip()
            else:
                id_and_seq[key] = line.rstrip()

    for transkript_id, ref_seq in tqdm(id_and_seq.items()):
        test_seq = translate(tse.get_seq(transkript_id), True)
        assert test_seq == ref_seq, test_seq

Example #4

Show file

def test_vcf_single_variant_synonymous_mutations(tse, svp):
    transcript_id = 'ENST00000356175'
    ref_seq = translate(tse.get_seq(transcript_id), True)
    single_var_seq = list(svp.extract(transcript_id))
    for seq in single_var_seq:
        assert seq == ref_seq, seq
    assert len(
        single_var_seq) == 337, 'Number of sequences != number of variants'

    count = 0
    single_var_seq = list(svp.extract_all())
    for t_id in single_var_seq:
        count += len(list(t_id))

    assert count == 825

Example #5

Show file

def test_hg38(tse):
    with open('err_transcripts', 'w+') as f:
        dfp = read_pep_fa(protein_file)
        dfp['transcript_id'] = dfp.transcript.str.split(".", n=1,
                                                        expand=True)[0]
        #assert not dfp['transcript_id'].duplicated().any()
        dfp = dfp.set_index("transcript_id")
        #dfp = dfp[~dfp.chromosome.isnull()]
        assert len(tse) > 100
        assert tse.transcripts.isin(dfp.index).all()
        div3_error = 0
        seq_mismatch_err = 0
        err_transcripts = []
        for transcript_id in tqdm(tse.transcripts):
            # make sure all ids can be found in the proteome
            dna_seq = tse.get_seq(transcript_id)
            if dna_seq == "NNN":
                f.write(transcript_id +
                        ' has an ambiguous start and end.Skip!')
                continue
            # dna_seq = dna_seq[:(len(dna_seq) // 3) * 3]
            # if len(dna_seq) % 3 != 0:
            #   div3_error += 1
            #  print("len(dna_seq) % 3 != 0: {}".format(transcript_id))
            # err_transcripts.append({"transcript_id": transcript_id, "div3_err": True})
            # continue
            if len(dna_seq) % 3 != 0:
                f.write(transcript_id)
                continue
            prot_seq = translate(dna_seq, hg38=True)
            if dfp.loc[transcript_id].seq != prot_seq:
                seq_mismatch_err += 1
                f.write("seq.mismatch: {}".format(transcript_id))
                n_mismatch = 0
                for i in range(len(prot_seq)):
                    a = dfp.loc[transcript_id].seq[i]
                    b = prot_seq[i]
                    if a != b:
                        n_mismatch += 1
                        f.write("{} {} {}/{}".format(a, b, i, len(prot_seq)))