def test_build_water_relations(): '''it test the function that makes the relations between two sequences using a markx10 format file''' seq = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCCTGCTCA' seq += 'AGCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTTTTATGTA' seq += 'CTGTTTTNACTCGCANGACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAG' seq += 'GGCNTGAAGGTGTGCCCACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGA' seq += 'TATGAGTAACGAGCAATTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCT' seq += 'GCATTGAATTCGACATTCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATAC' seq += 'TTCGATGGACGCTACTGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT' seq2 = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCTGCTCAA' seq2 += 'GCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTNACTCGCAN' seq2 += 'GACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAGGGCNTGAAGGTGTGCC' seq2 += 'CACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGATATGAGTAACGAGCAA' seq2 += 'TTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCTGCATTGAATTCGACAT' seq2 += 'TCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATACTTCGATGGACGCTAC' seq2 += 'TGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT' subject_seq = SeqWithQuality(seq=Seq(seq), name='subject') query_seq = SeqWithQuality(seq=Seq(seq2), name='query') subject_fhand = temp_fasta_file(subject_seq) parameters = {'subject':subject_fhand.name} aligner = create_runner(tool='water', parameters=parameters) result_fhand = aligner(query_seq)['water'] relations = build_relations_from_aligment(result_fhand, query_name=query_seq.name, subject_name=subject_seq.name) assert relations == {'query': [(0, 50), (51, 112), (113, 409)], 'subject': [(0, 50), (52, 113), (129, 425)]}
def _locate_codons_in_orf(sequence, orf, snv): 'It locates the snv in the orf coordinate system' query_name = sequence.name orf_seq = orf.qualifiers['dna'] subject_name = 'subject' subject_fhand = NamedTemporaryFile(suffix='.fasta') subject_fhand.write('>%s\n%s\n' % (subject_name, orf_seq)) subject_fhand.flush() parameters = {'subject':subject_fhand.name} aligner = create_runner(tool='water', parameters=parameters) result_fhand = aligner(sequence)['water'] relations = build_relations_from_aligment(result_fhand, query_name=sequence.name, subject_name=subject_name) #print relations coord = CoordSystem(relations=[relations]) # snv .positions snv_pos = snv.location.start.position try: snv_in_orf = coord.transform(from_mol=query_name, to_mol=subject_name, position=snv_pos) except RuntimeError: snv_in_orf = None #print snv_in_orf_start, snv_in_orf_end orf_start = 0 orf_end = len(orf.qualifiers['dna']) - 1 orf_start_limit_in_seq = coord.transform(from_mol=subject_name, to_mol=query_name, position=orf_start) orf_end_limit_in_seq = coord.transform(from_mol=subject_name, to_mol=query_name, position=orf_end) if snv_in_orf is None: # it can be utr3, utr5, or None if snv_pos < orf_start_limit_in_seq: position = 'utr5' elif snv_pos > orf_end_limit_in_seq: position = 'in utr3' else: position = None codon_start = None snv_in_orf = None else: start_codon_pos = snv_in_orf % 3 codon_start = snv_in_orf - start_codon_pos position = 'orf' return (position, codon_start, snv_in_orf)