Ejemplo n.º 1
0
def mult_align(sum_dict, align_dict):
    """Returns a biopython multiple alignment instance (MultipleSeqAlignment)"""
    mult_align_dict = {}
    for j in align_dict.abs(1).pos_align_dict:
        mult_align_dict[j] = ''

    for i in range(1, len(align_dict) + 1):
        # loop on positions
        for j in align_dict.abs(i).pos_align_dict:
            # loop within a position
            mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa
    alpha = Alphabet.Gapped(Alphabet.IUPAC.extended_protein)
    fssp_align = MultipleSeqAlignment([], alphabet=alpha)
    for i in sorted(mult_align_dict):
        fssp_align.append(
            SeqRecord(Seq(mult_align_dict[i], alpha),
                      sum_dict[i].pdb2 + sum_dict[i].chain2))
    return fssp_align
Ejemplo n.º 2
0
 def get_alphabet(self):
     alph = self.alphabets.get(self.type, Alphabet.generic_alphabet)
     if self.mol_seq and self.mol_seq.is_aligned:
         return Alphabet.Gapped(alph)
     return alph
Ejemplo n.º 3
0
    print(consensus)
    print("")
    print(
        summary.pos_specific_score_matrix(chars_to_ignore=['-'],
                                          axis_seq=consensus))
    print("")
    #Have a generic alphabet, without a declared gap char, so must tell
    #provide the frequencies and chars to ignore explicitly.
    print(
        summary.information_content(e_freq_table=expected,
                                    chars_to_ignore=['-']))
    print("")
    print("Trying a protein sequence with gaps and stops")

    alpha = Alphabet.HasStopCodon(
        Alphabet.Gapped(Alphabet.generic_protein, "-"), "*")
    a = Alignment(alpha)
    a.add_sequence("ID001", "MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-")
    a.add_sequence("ID002", "MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*")
    a.add_sequence("ID003", "MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*")
    print(a)
    print("=" * a.get_alignment_length())

    s = SummaryInfo(a)
    c = s.dumb_consensus(ambiguous="X")
    print(c)
    c = s.gap_consensus(ambiguous="X")
    print(c)
    print("")
    print(s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c))