コード例 #1
0
    def test_build_water_relations():
        '''it test the function that makes the relations between two sequences
         using a markx10 format file'''
        seq = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCCTGCTCA'
        seq += 'AGCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTTTTATGTA'
        seq += 'CTGTTTTNACTCGCANGACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAG'
        seq += 'GGCNTGAAGGTGTGCCCACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGA'
        seq += 'TATGAGTAACGAGCAATTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCT'
        seq += 'GCATTGAATTCGACATTCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATAC'
        seq += 'TTCGATGGACGCTACTGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT'

        seq2 = 'ATGGCTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCTGCTCAA'
        seq2 += 'GCTAGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTNACTCGCAN'
        seq2 += 'GACCAACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAGGGCNTGAAGGTGTGCC'
        seq2 += 'CACCACTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGATATGAGTAACGAGCAA'
        seq2 += 'TTGGGAAAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCTGCATTGAATTCGACAT'
        seq2 += 'TCACAGTGGATTCGTTTACCGTGAGACCCACAGGTCACCAGGATACTTCGATGGACGCTAC'
        seq2 += 'TGGACCATGTGGAAGCTGCCCATGTTTGGCTGCACCGAT'

        subject_seq = SeqWithQuality(seq=Seq(seq), name='subject')
        query_seq = SeqWithQuality(seq=Seq(seq2), name='query')

        subject_fhand = temp_fasta_file(subject_seq)
        parameters = {'subject':subject_fhand.name}
        aligner = create_runner(tool='water', parameters=parameters)
        result_fhand = aligner(query_seq)['water']
        relations = build_relations_from_aligment(result_fhand,
                                                  query_name=query_seq.name,
                                                  subject_name=subject_seq.name)
        assert relations == {'query': [(0, 50), (51, 112), (113, 409)],
                             'subject': [(0, 50), (52, 113), (129, 425)]}
コード例 #2
0
def _locate_codons_in_orf(sequence, orf, snv):
    'It locates the snv in the orf coordinate system'
    query_name = sequence.name
    orf_seq = orf.qualifiers['dna']
    subject_name = 'subject'
    subject_fhand = NamedTemporaryFile(suffix='.fasta')
    subject_fhand.write('>%s\n%s\n' % (subject_name, orf_seq))
    subject_fhand.flush()
    parameters   = {'subject':subject_fhand.name}
    aligner      = create_runner(tool='water', parameters=parameters)
    result_fhand = aligner(sequence)['water']
    relations = build_relations_from_aligment(result_fhand,
                                              query_name=sequence.name,
                                              subject_name=subject_name)
    #print relations
    coord = CoordSystem(relations=[relations])

    # snv .positions
    snv_pos = snv.location.start.position


    try:
        snv_in_orf = coord.transform(from_mol=query_name, to_mol=subject_name,
                                     position=snv_pos)
    except RuntimeError:
        snv_in_orf = None

    #print snv_in_orf_start, snv_in_orf_end

    orf_start = 0
    orf_end   = len(orf.qualifiers['dna']) - 1
    orf_start_limit_in_seq = coord.transform(from_mol=subject_name,
                                             to_mol=query_name,
                                             position=orf_start)
    orf_end_limit_in_seq   = coord.transform(from_mol=subject_name,
                                             to_mol=query_name,
                                             position=orf_end)

    if snv_in_orf is None:
        # it can be utr3, utr5, or None
        if snv_pos < orf_start_limit_in_seq:
            position = 'utr5'
        elif snv_pos > orf_end_limit_in_seq:
            position = 'in utr3'
        else:
            position = None
        codon_start = None
        snv_in_orf  = None
    else:
        start_codon_pos = snv_in_orf % 3
        codon_start     = snv_in_orf - start_codon_pos
        position        = 'orf'

    return (position, codon_start, snv_in_orf)