예제 #1
0
def add_consensus(alignment, threshold=0.9, ambiguous='-', name='consensus'):
    """Add a consensus line"""
    a = SummaryInfo(alignment)
    # cons=a.dumb_consensus(threshold, ambiguous)
    cons = a.gap_consensus(threshold, ambiguous)
    alignment.extend([SeqRecord(cons, id=name, name=name)])
    return alignment
예제 #2
0
def get_hist_ss_in_aln_as_string(alignment,type='Unknown',debug=0):
    """ gets an annotation line as a string, borrowed from aln2html"""
    sinfo=SummaryInfo(alignment)
    cons=sinfo.gap_consensus(threshold=0.9, ambiguous='X')
    features=get_hist_ss_in_aln_for_html(alignment,type=type,debug=0)
    f_description=''
    annot_line=[0,1,2]
    if(features):
        annot_line[0]=list(' '*len(cons))
        annot_line[1]=list(' '*len(cons))
        annot_line[2]=list(' '*len(cons))
    keys=sorted(list(features.keys()),key=lambda x: x[0])
    for k in keys:
        if(features[k].get('description',0)):
            f_description+='{0}-{1};'.format(features[k]['symbol'],features[k]['description'])
        lev=features[k].get('level',0)
        if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
            annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
        else:
            lev+=1
            if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
                annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
            else:
                lev+=1
                if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
                    annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
 
    return annot_line[0] #other are ignored currently
예제 #3
0
def add_consensus(alignment,threshold=0.9, ambiguous='-',name='consensus'):
    """Add a consensus line"""
    a=SummaryInfo(alignment)
    # cons=a.dumb_consensus(threshold, ambiguous)
    cons=a.gap_consensus(threshold, ambiguous)
    alignment.extend([SeqRecord(cons,id=name,name=name)])
    return alignment
예제 #4
0
def get_hist_ss_in_aln_as_string(alignment, type='Unknown', debug=0):
    """ gets an annotation line as a string, borrowed from aln2html"""
    sinfo = SummaryInfo(alignment)
    cons = sinfo.gap_consensus(threshold=0.9, ambiguous='X')
    features = get_hist_ss_in_aln_for_html(alignment, type=type, debug=0)
    f_description = ''
    annot_line = [0, 1, 2]
    if (features):
        annot_line[0] = list(' ' * len(cons))
        annot_line[1] = list(' ' * len(cons))
        annot_line[2] = list(' ' * len(cons))
    keys = sorted(list(features.keys()), key=lambda x: x[0])
    for k in keys:
        if (features[k].get('description', 0)):
            f_description += '{0}-{1};'.format(features[k]['symbol'],
                                               features[k]['description'])
        lev = features[k].get('level', 0)
        if (re.match('^\s+$', ''.join(annot_line[lev][k[0]:k[1] + 1]))):
            annot_line[lev][k[0]:k[1] +
                            1] = features[k]['symbol'] * (k[1] - k[0] + 1)
        else:
            lev += 1
            if (re.match('^\s+$', ''.join(annot_line[lev][k[0]:k[1] + 1]))):
                annot_line[lev][k[0]:k[1] +
                                1] = features[k]['symbol'] * (k[1] - k[0] + 1)
            else:
                lev += 1
                if (re.match('^\s+$',
                             ''.join(annot_line[lev][k[0]:k[1] + 1]))):
                    annot_line[lev][k[0]:k[1] +
                                    1] = features[k]['symbol'] * (k[1] - k[0] +
                                                                  1)

    return annot_line[0]  #other are ignored currently
    def test_proteins(self):
        alpha = HasStopCodon(Gapped(generic_protein, "-"), "*")
        a = MultipleSeqAlignment([
            SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-", alpha),
                      id="ID001"),
            SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*", alpha),
                      id="ID002"),
            SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*", alpha),
                      id="ID003")
        ])
        self.assertEqual(32, a.get_alignment_length())

        s = SummaryInfo(a)

        c = s.dumb_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*")

        c = s.gap_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX")

        m = s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c)
        self.assertEqual(
            str(m),
            """    A   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   W   Y
M  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H  0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0
X  2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
F  0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0
L  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K  0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
R  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
P  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0
E  0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
N  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0
W  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
""")

        ic = s.information_content(chars_to_ignore=['-', '*'])
        self.assertAlmostEqual(ic, 133.061475107, places=6)
예제 #6
0
    def test_nucleotides(self):
        filename = "GFF/multi.fna"
        format = "fasta"
        alignment = AlignIO.read(filename, format, alphabet=unambiguous_dna)
        summary = SummaryInfo(alignment)

        c = summary.dumb_consensus(ambiguous="N")
        self.assertEqual(str(c), "NNNNNNNN")

        c = summary.gap_consensus(ambiguous="N")
        self.assertEqual(str(c), "NNNNNNNN")

        expected = {"A": 0.25, "G": 0.25, "T": 0.25, "C": 0.25}

        m = summary.pos_specific_score_matrix(chars_to_ignore=["-"],
                                              axis_seq=c)
        self.assertEqual(
            str(m), """    A   C   G   T
N  2.0 0.0 1.0 0.0
N  1.0 1.0 1.0 0.0
N  1.0 0.0 2.0 0.0
N  0.0 1.0 1.0 1.0
N  1.0 2.0 0.0 0.0
N  0.0 2.0 1.0 0.0
N  1.0 2.0 0.0 0.0
N  0.0 2.0 1.0 0.0
""")

        # Have a generic alphabet, without a declared gap char, so must tell
        # provide the frequencies and chars to ignore explicitly.
        ic = summary.information_content(e_freq_table=expected,
                                         chars_to_ignore=["-"])
        self.assertAlmostEqual(ic, 7.32029999423075, places=6)
예제 #7
0
    def test_proteins(self):
        a = MultipleSeqAlignment([
            SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-"), id="ID001"),
            SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*"), id="ID002"),
            SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*"), id="ID003")
        ])
        self.assertEqual(32, a.get_alignment_length())

        s = SummaryInfo(a)

        c = s.dumb_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*")

        c = s.gap_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX")

        m = s.pos_specific_score_matrix(chars_to_ignore=["-", "*"], axis_seq=c)
        self.assertEqual(
            str(m),
            """    A   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   W   Y
M  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H  0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0
X  2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
F  0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0
L  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K  0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
R  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
P  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0
E  0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
N  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0
W  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
""")

        letters = IUPACData.protein_letters
        base_freq = 1.0 / len(letters)
        e_freq_table = {letter: base_freq for letter in letters}
        ic = s.information_content(e_freq_table=e_freq_table,
                                   chars_to_ignore=["-", "*"])
        self.assertAlmostEqual(ic, 133.061475107, places=6)
예제 #8
0
    def test_proteins(self):
        alpha = HasStopCodon(Gapped(generic_protein, "-"), "*")
        a = MultipleSeqAlignment([
                SeqRecord(Seq("MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-", alpha), id="ID001"),
                SeqRecord(Seq("MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*", alpha), id="ID002"),
                SeqRecord(Seq("MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*", alpha), id="ID003")])
        self.assertEqual(32, a.get_alignment_length())

        s = SummaryInfo(a)

        c = s.dumb_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHQAIFIYQIGYXXLKSGYIQSIRSPEYDNW*")

        c = s.gap_consensus(ambiguous="X")
        self.assertEqual(str(c), "MHXXIFIYQIGYXXLKSGYIQSIRSPEYXNWX")

        m = s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c)
        self.assertEqual(str(m), """    A   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   W   Y
M  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
H  0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0
X  2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
F  0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0
L  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
K  0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
G  0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Q  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
I  0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
R  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0
S  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0
P  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0
E  0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
Y  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0
X  0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
N  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 0.0
W  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0
X  0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
""")

        ic = s.information_content(chars_to_ignore=['-', '*'])
        self.assertAlmostEqual(ic, 133.061475107, places=6)
예제 #9
0
def trim_aln_gaps(alignment,threshold=0.8):
    """Removes positions with more than threshold gaps in alignment"""
    a=SummaryInfo(alignment)
    cons=a.gap_consensus(threshold=threshold, ambiguous='X')
    new_aln=alignment[:,0:0]
    for c,i in zip(cons,range(len(cons))):
        if(c=='-'):
            continue
        else:
            new_aln+=alignment[:,i:i+1]

    return new_aln
예제 #10
0
def trim_aln_gaps(alignment, threshold=0.8):
    """Removes positions with more than threshold gaps in alignment"""
    a = SummaryInfo(alignment)
    cons = a.gap_consensus(threshold=threshold, ambiguous='X')
    new_aln = alignment[:, 0:0]
    for c, i in zip(cons, range(len(cons))):
        if (c == '-'):
            continue
        else:
            new_aln += alignment[:, i:i + 1]

    return new_aln
예제 #11
0
def get_hist_ss_in_aln(alignment, type='Unknown', debug=0):
    """Returns sequence elements in histone alignment, all numbers assume first element in seq has number 0!!! Not like in PDB"""

    #Let's extract consensus
    if (debug):
        print(alignment)
    a = SummaryInfo(alignment)
    cons = a.gap_consensus(threshold=0.5, ambiguous='X')
    cons = Seq(str(cons).replace('-', 'X'))
    if (debug):
        print("Consensus")
        print(cons)
    hv, ss = get_hist_ss(cons, type, debug)
    return hv, ss
예제 #12
0
def get_hist_ss_in_aln(alignment,type='Unknown',debug=0):
    """Returns sequence elements in histone alignment, all numbers assume first element in seq has number 0!!! Not like in PDB"""

    #Let's extract consensus
    if(debug):
        print alignment
    a=SummaryInfo(alignment)
    cons=a.gap_consensus(threshold=0.5, ambiguous='X')
    cons=Seq(str(cons).replace('-','X'))
    if(debug):
        print "Consensus"
        print cons
    hv,ss=get_hist_ss(cons,type,debug)
    return hv,ss
예제 #13
0
 def getConservedDomain(self):
     cons = []
     align = SummaryInfo(self.__alignment)
     consenso = str(align.gap_consensus())
     temp = ''
     for i in range(len(consenso)):
         if consenso[i] not in "X-":
             temp += consenso[i]
         else:
             if temp != '':
                 cons.append(temp)
             temp = ''
     max_cons = ''
     for i in cons:
         if len(i) > len(max_cons): max_cons = i
     return max_cons
예제 #14
0
def output_consensus(y, threshold_value, consensus_output_dir):
    """Takes as input an alignment file
    and outputs a consensus sequence in fasta format"""
    file_name = os.path.basename(y)
    fasta_name = file_name.split('_align')[0]
    alignment = AlignIO.read(open(y), "fasta")
    summary_align = SummaryInfo(alignment)
    consensus = summary_align.gap_consensus(threshold = threshold_value, 
                                            ambiguous = 'N', 
                                            consensus_alpha = alphabet, 
                                            require_multiple = 1)
    consensus_seq = SeqRecord.SeqRecord(consensus,id=fasta_name+"_consensus")
    output_file_name = str(consensus_output_dir+'/'+fasta_name+"_cons.fasta")
    output_handle = open(output_file_name, "w")
    print "Writing consensus sequence for " + fasta_name
    SeqIO.write(consensus_seq, output_handle, "fasta")
    output_handle.close()
예제 #15
0
    def test_nucleotides(self):
        filename = "GFF/multi.fna"
        format = "fasta"
        alignment = AlignIO.read(filename, format, alphabet=unambiguous_dna)
        summary = SummaryInfo(alignment)

        c = summary.dumb_consensus(ambiguous="N")
        self.assertEqual(str(c), 'NNNNNNNN')
        self.assertNotEqual(c.alphabet, unambiguous_dna)
        self.assertTrue(isinstance(c.alphabet, DNAAlphabet))

        c = summary.gap_consensus(ambiguous="N")
        self.assertEqual(str(c), 'NNNNNNNN')
        self.assertNotEqual(c.alphabet, unambiguous_dna)
        self.assertTrue(isinstance(c.alphabet, DNAAlphabet))

        expected = FreqTable({"A": 0.25, "G": 0.25, "T": 0.25, "C": 0.25},
                             FREQ, unambiguous_dna)

        m = summary.pos_specific_score_matrix(chars_to_ignore=['-'],
                                              axis_seq=c)
        self.assertEqual(str(m), """    A   C   G   T
N  2.0 0.0 1.0 0.0
N  1.0 1.0 1.0 0.0
N  1.0 0.0 2.0 0.0
N  0.0 1.0 1.0 1.0
N  1.0 2.0 0.0 0.0
N  0.0 2.0 1.0 0.0
N  1.0 2.0 0.0 0.0
N  0.0 2.0 1.0 0.0
""")

        # Have a generic alphabet, without a declared gap char, so must tell
        # provide the frequencies and chars to ignore explicitly.
        ic = summary.information_content(e_freq_table=expected,
                                         chars_to_ignore=['-'])
        self.assertAlmostEqual(ic, 7.32029999423075, places=6)
예제 #16
0
def aln2html(msa,filename,features=None,title=None,description=True,field1w=20,field2w=35):
    """ 
    This function outputs HTML from msa and annotates features.
    msa - Biopython MSA,
    filename - html file to output the result.
    features - a dictionary of features, organized as follows:
    {(begin,end):{'level':0(default),'symbol':'H','description':'desc'}}
    if features overlap and not levels, they will be split to different levels.
    Only three levels (0,1,2) are available.
    """


    style="""
pre,td{margin: 0px;padding: 0px;border: 0px;}
.pos{color:blue;}
.neg{color:red;}
.pol{color:green;}
.hphob{color:grey;}
.def{color:black;}
.conserved{background:lightblue;}
.nonconserved{background:white;}
"""
    sinfo=SummaryInfo(msa)
    cons=sinfo.gap_consensus(threshold=0.9, ambiguous='X')
    
    #Let's work on features
    f_description=''
    msatext=''
    annot_line=[0,1,2]
    if(features):
        annot_line[0]=list(' '*len(cons))
        annot_line[1]=list(' '*len(cons))
        annot_line[2]=list(' '*len(cons))
        keys=sorted(list(features.keys()),key=lambda x: x[0])
        for k in keys:
            if(features[k].get('description',0)):
                f_description+='{0}-{1};'.format(features[k]['symbol'],features[k]['description'])
            lev=features[k].get('level',0)
            if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
                annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
            else:
                lev+=1
                if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
                    annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
                else:
                    lev+=1
                    if(re.match('^\s+$',''.join(annot_line[lev][k[0]:k[1]+1]))):
                        annot_line[lev][k[0]:k[1]+1]=features[k]['symbol']*(k[1]-k[0]+1)
        if(not re.match('^\s+$',''.join(annot_line[2]))):
            msatext='<TR><TD><PRE>{0:<{field1w}}</PRE></TD>'.format('annotation',field1w=field1w+2)
            if(description):
                msatext+='<TD><PRE>{0:<{field2w}}</PRE></TD>'.format('level 2',field2w=field2w+2)
            for c in annot_line[2]:
                msatext+='<TD><PRE>{0}</PRE></TD>'.format(c)
            msatext+='</TR>'
        if(not re.match('^\s+$',''.join(annot_line[1]))):
            msatext='<TR><TD><PRE>{0:<{field1w}}</PRE></TD>'.format('annotation',field1w=field1w+2)
            if(description):
                msatext+='<TD><PRE>{0:<{field2w}}</PRE></TD>'.format('level 1',field2w=field2w+2)
            for c in annot_line[1]:
                msatext+='<TD><PRE>{0}</PRE></TD>'.format(c)
            msatext+='</TR>'
        if(not re.match('^\s+$',''.join(annot_line[0]))):
            msatext='<TR><TD><PRE>{0:<{field1w}}</PRE></TD>'.format('annotation',field1w=field1w+2)
            if(description):
                msatext+='<TD><PRE>{0:<{field2w}}</PRE></TD>'.format('level 0',field2w=field2w+20)
            for c in annot_line[0]:
                msatext+='<TD><PRE>{0}</PRE></TD>'.format(c)
            msatext+='</TR>'
        f_description+='<BR><BR>'



    for s in msa:
        if(re.search(r'\d\d+',s.id)):
            gi=re.search(r'(\d\d+)',s.id).group(1)
            line='<TR><TD><PRE><a href="http://www.ncbi.nlm.nih.gov/protein/?term={0}">{1:<{field1w}}</a></PRE></TD>'.format(gi,s.id[:field1w],field1w=field1w+2)
            if(description):
                line+='<TD><PRE>{0:<{field2w}}</PRE></TD>'.format(s.description[:field2w],field2w=field2w+2)
        else:
            line='<TR><TD><PRE>{0:<{field1w}}</PRE></TD>'.format(s.id[:field1w],field1w=field1w+2)
            if(description):
                line+='<TD><PRE>{0:<{field2w}}</PRE></TD>'.format(s.description[:field2w],field1w=field1w+2)
        for c,i in zip(s.seq,range(len(s.seq))):
            line+='<TD><PRE class="{0} {1}">{2}</PRE></TD>'.format(restypedict.get(c,'def'),'conserved' if c==cons[i] and c!='-' else 'nonconserved',c)
        line+='</TR>'
        msatext=msatext+line


    a=open(filename,'w')
    a.write("""
<!DOCTYPE html>
<HTML>
<HEAD>
<META http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<TITLE>MultipleSequenceAlignment</TITLE>
<style>
{style}
</style>
</HEAD>
<BODY style="background-color:white; color:black; a:link:blue; a:active:red; a:visited:purple">
{title}<BR><BR>
{features}
<TABLE style="border:0px; border-spacing:0px; background-color:white; color:black; a:link:blue; a:active:red; a:visited:purple;">

{msatext}

</TABLE>
</BODY>
</HTML>
""".format(\
title=title,\
msatext=msatext,\
style=style,\
features=f_description
))

    a.close()
예제 #17
0
 def getPerc(self):
     align = SummaryInfo(self.__alignment)
     return float(1 - (align.gap_consensus().count("X") +
                       align.gap_consensus().count("-")) /
                  len(str(align.gap_consensus())))
예제 #18
0
 def getConsenso(self):
     align = SummaryInfo(self.__alignment)
     return align.gap_consensus()