Ejemplo n.º 1
0
def run_group_alignment (sequence_group):

    seqrecords = []
    for seq_id in sequence_group:
        seqrecords.append(Bio.SeqRecord.SeqRecord (Bio.Seq.Seq(sequence_group[seq_id].replace ('-',''), Bio.Alphabet.IUPAC.IUPACAmbiguousDNA), id = seq_id, name = seq_id, description = ''))
    
    if len (seqrecords) == 1:
        refseq = seqrecords[0].format ('fasta')
        return {'ref': refseq, 'alignment': refseq, 'seqs': seqrecords}
        
    # find the longest sequence 
    seq_lengths = [len(record.seq) for record in seqrecords]
    refseq = seqrecords[seq_lengths.index(max(seq_lengths))]
    #print ('Ref set to %s' % refseq.id)
    
    sm = BLOSUM62.load()

    msa, discarded = align_to_refseq(
        refseq,
        seqrecords,
        score_matrix=sm,
        codon=True,
        expected_identity=0.6,
        keep_insertions=False
    )
    

    string_buffer = io.StringIO ()
    Bio.SeqIO.write (msa, string_buffer, "fasta")
    all_lines = string_buffer.getvalue()
    string_buffer.close()
    return {'ref': refseq.format ('fasta'), 'alignment': all_lines, 'seqs': seqrecords}
Ejemplo n.º 2
0
def run_group_alignment (seqrecords, refseqs):
    
    
    sm = BLOSUM62.load()

    seq_scores = {'names':{}}
    
    sequence_names = {}
    for seq in seqrecords:
        sequence_names[seq.id] = len (sequence_names)
        
    seqrecords.insert (0, 0)
    
    
    for type in refseqs:
        seq_scores [type] = {}
        seq_scores['names'][type] = []
    
        for seq in refseqs[type]:
            seqrecords[0] = seq
            seq_scores['names'][type].append (seq.id)
            
            msa, discarded = align_to_refseq(
                seq,
                seqrecords,
                score_matrix=sm,
                codon=True,
                expected_identity=0.4,
                keep_insertions=True,
                quiet=True
            )
            
            if (discarded):
                print (discarded)    
                
            max_score = msa[0].annotations['_pbpscore']
            max_len   = msa[0].annotations['_nbpidentical']
            seq_scores[type][seq.id] = [None for idx in range (len (sequence_names))]
            msa.pop (0)
            for successful in msa:
                idx = sequence_names [successful.id]
                seq_scores[type][seq.id][idx] = (successful.annotations['_pbpscore']/max_score+successful.annotations['_nbpidentical']/max_len)*0.5
            
        
    return seq_scores
Ejemplo n.º 3
0
def run_group_alignment(seqrecords, refseqs):

    sm = BLOSUM62.load()

    seq_scores = {'names': {}}

    sequence_names = {}
    for seq in seqrecords:
        sequence_names[seq.id] = len(sequence_names)

    seqrecords.insert(0, 0)

    for type in refseqs:
        seq_scores[type] = {}
        seq_scores['names'][type] = []

        for seq in refseqs[type]:
            seqrecords[0] = seq
            seq_scores['names'][type].append(seq.id)

            msa, discarded = align_to_refseq(seq,
                                             seqrecords,
                                             score_matrix=sm,
                                             codon=True,
                                             expected_identity=0.4,
                                             keep_insertions=True,
                                             quiet=True)

            if (discarded):
                print(discarded)

            max_score = msa[0].annotations['_pbpscore']
            max_len = msa[0].annotations['_nbpidentical']
            seq_scores[type][seq.id] = [
                None for idx in range(len(sequence_names))
            ]
            msa.pop(0)
            for successful in msa:
                idx = sequence_names[successful.id]
                seq_scores[type][seq.id][idx] = (
                    successful.annotations['_pbpscore'] / max_score +
                    successful.annotations['_nbpidentical'] / max_len) * 0.5

    return seq_scores