Exemple #1
0
def stitch(record1, record2):
    seq1 = array([record1.seq.tostring()])
    seq2 = array([reverse_complement(record2.seq.tostring())])
    seq1.dtype = '|S1'
    seq2.dtype = '|S1'
    quals1 = array(record1.letter_annotations['phred_quality'])
    quals2 = array(record2.letter_annotations['phred_quality'][::-1])
    
    log10p_consensus_1 = log1p(-power(10, -quals1 / 10.)) / log(10)
    log10p_consensus_2 = log1p(-power(10, -quals2 / 10.)) / log(10)
    log10p_error_1 = -log10(3) - (quals1 / 10.)
    log10p_error_2 = -log10(3) - (quals2 / 10.)
    
    min_overlap = 1
    max_overlap = max(len(record1), len(record2))
    overlaps = {}
    for overlap in range(1, max_overlap):
        s1 = seq1[-overlap:]
        s2 = seq2[:overlap]
        q1 = quals1[-overlap:]
        q2 = quals2[:overlap]
        lpc1 = log10p_consensus_1[-overlap:]
        lpc2 = log10p_consensus_2[:overlap]
        lpe1 = log10p_error_1[-overlap:]
        lpe2 = log10p_error_2[:overlap]
        
        consensus = choose(q1 < q2, [s1, s2])
        score = sum(choose(consensus == s1, [lpe1, lpc1])) + sum(choose(consensus == s2, [lpe2, lpc2])) + len(consensus) * log10(4) * 2    # last term is null hypothesis, p=1/4
        consensus.dtype = '|S%i' % len(consensus)
        overlaps[overlap] = (consensus[0],score)
    
    return overlaps
Exemple #2
0
def build_aln(alnsummary, vulgar_commands, queryseq, targetseq):
    """Build full alignment from exonerate using 'parsable' preset and vulgar output"""

    queryname = alnsummary['query_id']
    targetname = alnsummary['target_id']

    # process strands. the position vars below will always progress
    # from 0->len(seq), so the seqs must be revcomped accordingly

    queryposition = alnsummary['query_aln_begin']
    targetposition = alnsummary['target_aln_begin']
    if alnsummary['query_strand'] == '-':
        queryseq = seqtools.reverse_complement(queryseq)
        queryposition = len(queryseq) - queryposition
    if alnsummary['target_strand'] == '-':
        targetseq = seqtools.reverse_complement(targetseq)
        targetposition = len(targetseq) - targetposition
    pad = abs(queryposition - targetposition)

    # build alignment
    queryaln = ''
    targetaln = ''

    # process necessary padding
    if queryposition > targetposition:
        targetaln = ' ' * pad
    else:
        queryaln = ' ' * pad

    # add pre-aln sequence
    queryaln += queryseq[0:queryposition]
    targetaln += targetseq[0:targetposition]

    # walk through alignment (from vulgar output)
    for cmd in vulgar_commands:
        if cmd[0] == 'M':
            assert (cmd[1] == cmd[2])
            queryaln += queryseq[queryposition:queryposition + cmd[1]]
            targetaln += targetseq[targetposition:targetposition + cmd[2]]
            queryposition += cmd[1]
            targetposition += cmd[2]
        elif cmd[0] == 'G':
            assert ((cmd[1] == 0) != (cmd[1] == 0))  # xor
            if cmd[1] == 0:
                queryaddendum = '-' * cmd[2]
                targetaddendum = targetseq[targetposition:targetposition +
                                           cmd[2]]
            elif cmd[2] == 0:
                queryaddendum = queryseq[queryposition:queryposition + cmd[1]]
                targetaddendum = '-' * cmd[1]
            queryaln += queryaddendum
            targetaln += targetaddendum
            queryposition += cmd[1]
            targetposition += cmd[2]
        else:
            raise ValueError, "I do not understand the vulgar command %s" % cmd[
                0]

    # add any post-aln sequence
    queryaln += queryseq[queryposition:]
    targetaln += targetseq[targetposition:]

    return (queryaln, targetaln)
Exemple #3
0
 def seqdict2revcompseqdict(seqdict):
     revcompdict = {}
     for item in seqdict.iteritems():
         revcompdict[item[0]] = seqtools.reverse_complement(item[1])
     return revcompdict
Exemple #4
0
 def seqdict2revcompseqdict(seqdict):
     revcompdict = {}
     for item in seqdict.iteritems():
         revcompdict[item[0]] = seqtools.reverse_complement(item[1])
     return revcompdict
Exemple #5
0
def build_aln(alnsummary,vulgar_commands,queryseq,targetseq):
    """Build full alignment from exonerate using 'parsable' preset and vulgar output"""
    
    queryname = alnsummary['query_id']
    targetname = alnsummary['target_id']
    
    # process strands. the position vars below will always progress
    # from 0->len(seq), so the seqs must be revcomped accordingly
    
    queryposition  = alnsummary['query_aln_begin']
    targetposition = alnsummary['target_aln_begin']
    if alnsummary['query_strand'] == '-':
        queryseq = seqtools.reverse_complement(queryseq)
        queryposition = len(queryseq) - queryposition
    if alnsummary['target_strand'] == '-':
        targetseq = seqtools.reverse_complement(targetseq)
        targetposition = len(targetseq) - targetposition
    pad = abs(queryposition - targetposition)
    
    # build alignment
    queryaln  = ''
    targetaln = ''
    
    # process necessary padding
    if queryposition > targetposition:
        targetaln = ' ' * pad
    else:
        queryaln  = ' ' * pad
    
    # add pre-aln sequence
    queryaln  += queryseq[0:queryposition]
    targetaln += targetseq[0:targetposition]
    
    # walk through alignment (from vulgar output)
    for cmd in vulgar_commands:
        if cmd[0] == 'M':
            assert(cmd[1]==cmd[2])
            queryaln  += queryseq[queryposition:queryposition+cmd[1]]
            targetaln += targetseq[targetposition:targetposition+cmd[2]]
            queryposition  += cmd[1]
            targetposition += cmd[2]
        elif cmd[0] == 'G':
            assert( (cmd[1]==0) != (cmd[1]==0) )    # xor
            if cmd[1] == 0:
                queryaddendum = '-' * cmd[2]
                targetaddendum = targetseq[targetposition:targetposition+cmd[2]]
            elif cmd[2] == 0:
                queryaddendum = queryseq[queryposition:queryposition+cmd[1]]
                targetaddendum = '-' * cmd[1]
            queryaln  += queryaddendum
            targetaln += targetaddendum
            queryposition  += cmd[1]
            targetposition += cmd[2]
        else:
            raise ValueError, "I do not understand the vulgar command %s" % cmd[0]
   
    # add any post-aln sequence
    queryaln  += queryseq[queryposition:]
    targetaln += targetseq[targetposition:]
    
    return (queryaln,targetaln)