def test_pseudo_count(self): # use example from # http://biologie.univ-mrs.fr/upload/p202/01.4.PSSM_theory.pdf alpha = unambiguous_dna dna_align = MultipleSeqAlignment([ SeqRecord(Seq("AACCACGTTTAA", alpha), id="ID001"), SeqRecord(Seq("CACCACGTGGGT", alpha), id="ID002"), SeqRecord(Seq("CACCACGTTCGC", alpha), id="ID003"), SeqRecord(Seq("GCGCACGTGGGG", alpha), id="ID004"), SeqRecord(Seq("TCGCACGTTGTG", alpha), id="ID005"), SeqRecord(Seq("TGGCACGTGTTT", alpha), id="ID006"), SeqRecord(Seq("TGACACGTGGGA", alpha), id="ID007"), SeqRecord(Seq("TTACACGTGCGC", alpha), id="ID008") ]) summary = SummaryInfo(dna_align) expected = FreqTable({ "A": 0.325, "G": 0.175, "T": 0.325, "C": 0.175 }, FREQ, unambiguous_dna) ic = summary.information_content(e_freq_table=expected, log_base=math.exp(1), pseudo_count=1) self.assertAlmostEqualList(summary.ic_vector, [ 0.110, 0.090, 0.360, 1.290, 0.800, 1.290, 1.290, 0.80, 0.610, 0.390, 0.470, 0.040 ], places=2) self.assertAlmostEqual(ic, 7.546, places=3)
def test_nucleotides(self): filename = "GFF/multi.fna" format = "fasta" alignment = AlignIO.read(filename, format, alphabet=unambiguous_dna) summary = SummaryInfo(alignment) c = summary.dumb_consensus(ambiguous="N") self.assertEqual(str(c), 'NNNNNNNN') self.assertNotEqual(c.alphabet, unambiguous_dna) self.assertTrue(isinstance(c.alphabet, DNAAlphabet)) c = summary.gap_consensus(ambiguous="N") self.assertEqual(str(c), 'NNNNNNNN') self.assertNotEqual(c.alphabet, unambiguous_dna) self.assertTrue(isinstance(c.alphabet, DNAAlphabet)) expected = FreqTable({ "A": 0.25, "G": 0.25, "T": 0.25, "C": 0.25 }, FREQ, unambiguous_dna) m = summary.pos_specific_score_matrix(chars_to_ignore=['-'], axis_seq=c) self.assertEqual( str(m), """ A C G T N 2.0 0.0 1.0 0.0 N 1.0 1.0 1.0 0.0 N 1.0 0.0 2.0 0.0 N 0.0 1.0 1.0 1.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 N 1.0 2.0 0.0 0.0 N 0.0 2.0 1.0 0.0 """) # Have a generic alphabet, without a declared gap char, so must tell # provide the frequencies and chars to ignore explicitly. ic = summary.information_content(e_freq_table=expected, chars_to_ignore=['-']) self.assertAlmostEqual(ic, 7.32029999423075, places=6)