def test_get_transcript_and_mutate_vcf(): variant = { 'chr' : '10', 'pos' : 43617416, 'ref' : 'T', 'alt' : 'C' } vcf = pd.DataFrame.from_records([variant]) transcripts_ids = ensembl.annotate_vcf_transcripts(vcf) transcript_ids = set(transcripts_ids['stable_id_transcript']) assert( "ENST00000355710" in transcript_ids) assert( "ENST00000340058" in transcript_ids) transcript_id = "ENST00000355710" cdna_idx = ensembl.get_transcript_index_from_pos( variant['pos'], transcript_id, skip_untranslated_region = False) assert cdna_idx is not None assert cdna_idx < 5569 cdna_transcript = ref_data.get_cdna(transcript_id) assert(cdna_transcript[cdna_idx] == variant['ref']) cds_idx = ensembl.get_transcript_index_from_pos( variant['pos'], transcript_id, skip_untranslated_region = True) assert cds_idx is not None cds_transcript = ref_data.get_cds(transcript_id) assert(cds_transcript[cds_idx] == variant['ref']) region = mutate_protein_from_transcript( cds_transcript, cds_idx, variant['ref'], variant['alt'], padding = 10) assert region is not None assert len(region.seq) == 21, (region.seq, len(region.seq)) assert region.seq == 'RSQGRIPVKWTAIESLFDHIY'
def test_get_transcript_index_from_pos(): variant = { 'chr' : '3', 'pos' : 41275636, 'ref' : 'G', 'alt' : 'A' } transcript_id = 'ENST00000405570' idx = ensembl.get_transcript_index_from_pos( 41275636, transcript_id, skip_untranslated_region = False) assert(idx == 1686), idx transcript = ref_data.get_cdna(transcript_id) assert(transcript[idx] == variant['ref'])
def test_peptide_from_transcript_PARS2(): """ test_peptide_from_transcript: """ transcript_id = 'ENST00000371279' variant = { 'chr' : '1', 'pos' : 55224569, 'ref' : 'T', 'alt' : 'G' } cds_idx = ensembl.get_transcript_index_from_pos( variant['pos'], transcript_id, skip_untranslated_region = True) assert cds_idx is not None assert cds_idx == 265