def _locate_codons_in_orf(sequence, orf, snv): 'It locates the snv in the orf coordinate system' query_name = sequence.name orf_seq = orf.qualifiers['dna'] subject_name = 'subject' subject_fhand = NamedTemporaryFile(suffix='.fasta') subject_fhand.write('>%s\n%s\n' % (subject_name, orf_seq)) subject_fhand.flush() parameters = {'subject':subject_fhand.name} aligner = create_runner(tool='water', parameters=parameters) result_fhand = aligner(sequence)['water'] relations = build_relations_from_aligment(result_fhand, query_name=sequence.name, subject_name=subject_name) #print relations coord = CoordSystem(relations=[relations]) # snv .positions snv_pos = snv.location.start.position try: snv_in_orf = coord.transform(from_mol=query_name, to_mol=subject_name, position=snv_pos) except RuntimeError: snv_in_orf = None #print snv_in_orf_start, snv_in_orf_end orf_start = 0 orf_end = len(orf.qualifiers['dna']) - 1 orf_start_limit_in_seq = coord.transform(from_mol=subject_name, to_mol=query_name, position=orf_start) orf_end_limit_in_seq = coord.transform(from_mol=subject_name, to_mol=query_name, position=orf_end) if snv_in_orf is None: # it can be utr3, utr5, or None if snv_pos < orf_start_limit_in_seq: position = 'utr5' elif snv_pos > orf_end_limit_in_seq: position = 'in utr3' else: position = None codon_start = None snv_in_orf = None else: start_codon_pos = snv_in_orf % 3 codon_start = snv_in_orf - start_codon_pos position = 'orf' return (position, codon_start, snv_in_orf)
def test_basic_coord(self): 'It tests the basic coordinate transformations' #geno 111 # 0123456789012 #geno2 1234567890123 #geno3 1111111 # 6543210987654 # #cdna 0123 4567 # #prot 00 011 # 01 201 coord = CoordSystem(relations=[{'geno': [(2, 5), (8, 11)], 'cdna': [(0, 3), (4, 7)]}]) assert coord.transform(from_mol='cdna', to_mol='geno', position=6) == 10 assert coord.transform(from_mol='cdna', to_mol='geno', position=0) == 2 #more than two coord = CoordSystem(relations=[{'geno': [(0, 12)], 'geno2': [(1, 13)]}, {'geno': [(2, 5), (8, 11)], 'cdna':[(0, 3), (4, 7)]},]) assert coord.transform(from_mol='geno', to_mol='cdna', position=3) == 1 assert coord.transform(from_mol='cdna', to_mol='geno', position=6) == 10 assert coord.transform(from_mol='geno', to_mol='geno2', position=1) == 2 #with proteins coord = CoordSystem(relations=[{'geno': [(0, 12)], 'geno2': [(1, 13)]}, {'geno': [(2, 5), (8, 11)], 'cdna':[(0, 3), (4, 7)]}, {'cdna':[(2, 6)], 'prot':[(CodonPosition(0, 0), CodonPosition(1, 1))]}]) assert coord.transform(from_mol='geno', to_mol='cdna', position=3) == 1 assert coord.transform(from_mol='cdna', to_mol='geno', position=6) == 10 assert coord.transform(from_mol='geno', to_mol='geno2', position=11) == 12 assert coord.transform(from_mol='geno', to_mol='prot', position=5) == 1 assert coord.transform(from_mol='prot', to_mol='geno', position=2) == 8 #geno 111 # 0123456789012 #geno3 1111111 # 6543210987654 #reversed coord = CoordSystem(relations=[{'geno': [(0, 12)], 'geno3': [(16, 4)]}]) assert coord.transform(from_mol='geno', to_mol='geno3', position=2) == 14 #geno 111 # 0123456789012 #cdna 876 543210 #prot 54 3210 coord = CoordSystem(relations=[{'geno': [(1, 3), (6, 11)], 'cdna':[(8, 6), (5, 0)]}, {'cdna':[(2, 7)], 'prot':[(CodonPosition(0, 0), CodonPosition(1, 2))]}]) assert coord.transform(from_mol='geno', to_mol='prot', position=2) == 5