def test_read_write_clustal(self): """Test the base alignment stuff.""" path = os.path.join(os.getcwd(), "Clustalw", "opuntia.aln") alignment = AlignIO.read(path, "clustal", alphabet=Alphabet.Gapped( IUPAC.unambiguous_dna)) self.assertEqual(len(alignment), 7) seq_record = alignment[0] self.assertEqual(seq_record.description, "gi|6273285|gb|AF191659.1|AF191") self.assertEqual( seq_record.seq, Seq("TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA" )) seq_record = alignment[1] self.assertEqual(seq_record.description, "gi|6273284|gb|AF191658.1|AF191") self.assertEqual( seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) seq_record = alignment[2] self.assertEqual(seq_record.description, "gi|6273287|gb|AF191661.1|AF191") self.assertEqual( seq_record.seq, "TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) seq_record = alignment[3] self.assertEqual(seq_record.description, "gi|6273286|gb|AF191660.1|AF191") self.assertEqual( seq_record.seq, "TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) seq_record = alignment[4] self.assertEqual(seq_record.description, "gi|6273290|gb|AF191664.1|AF191") self.assertEqual( seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) seq_record = alignment[5] self.assertEqual(seq_record.description, "gi|6273289|gb|AF191663.1|AF191") self.assertEqual( seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA" ) seq_record = alignment[6] self.assertEqual(seq_record.description, "gi|6273291|gb|AF191665.1|AF191") self.assertEqual( seq_record.seq, "TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) self.assertEqual(alignment.get_alignment_length(), 156) align_info = AlignInfo.SummaryInfo(alignment) consensus = align_info.dumb_consensus() self.assertIsInstance(consensus, Seq) self.assertEqual( consensus, "TATACATTAAAGXAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTXCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA" ) dictionary = align_info.replacement_dictionary(["N"]) self.assertEqual(len(dictionary), 16) self.assertAlmostEqual(dictionary[("A", "A")], 1395.0, places=1) self.assertAlmostEqual(dictionary[("A", "C")], 3.0, places=1) self.assertAlmostEqual(dictionary[("A", "G")], 13.0, places=1) self.assertAlmostEqual(dictionary[("A", "T")], 6.0, places=1) self.assertAlmostEqual(dictionary[("C", "A")], 3.0, places=1) self.assertAlmostEqual(dictionary[("C", "C")], 271.0, places=1) self.assertAlmostEqual(dictionary[("C", "G")], 0, places=1) self.assertAlmostEqual(dictionary[("C", "T")], 16.0, places=1) self.assertAlmostEqual(dictionary[("G", "A")], 5.0, places=1) self.assertAlmostEqual(dictionary[("G", "C")], 0, places=1) self.assertAlmostEqual(dictionary[("G", "G")], 480.0, places=1) self.assertAlmostEqual(dictionary[("G", "T")], 0, places=1) self.assertAlmostEqual(dictionary[("T", "A")], 6.0, places=1) self.assertAlmostEqual(dictionary[("T", "C")], 12.0, places=1) self.assertAlmostEqual(dictionary[("T", "G")], 0, places=1) self.assertAlmostEqual(dictionary[("T", "T")], 874.0, places=1) matrix = align_info.pos_specific_score_matrix(consensus, ["N"]) self.assertEqual( str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 X 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 T 0.0 0.0 0.0 3.0 A 3.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 X 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) matrix = align_info.pos_specific_score_matrix(chars_to_ignore=["N"]) self.assertEqual( str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 X 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 T 0.0 0.0 0.0 3.0 A 3.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 T 0.0 0.0 0.0 1.0 A 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 X 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) second_seq = alignment[1].seq matrix = align_info.pos_specific_score_matrix(second_seq, ["N"]) self.assertEqual( str(matrix), """\ A C G T T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 1.0 0.0 0.0 6.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 4.0 0.0 3.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 G 0.0 0.0 7.0 0.0 C 0.0 7.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 4.0 A 4.0 0.0 0.0 0.0 - 0.0 0.0 0.0 3.0 - 3.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 - 0.0 0.0 0.0 1.0 - 1.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 C 1.0 6.0 0.0 0.0 A 6.0 0.0 0.0 1.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 3.0 0.0 4.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 2.0 0.0 5.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 C 0.0 7.0 0.0 0.0 T 0.0 1.0 0.0 6.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 T 0.0 0.0 0.0 7.0 G 1.0 0.0 6.0 0.0 T 0.0 0.0 0.0 7.0 A 7.0 0.0 0.0 0.0 C 0.0 7.0 0.0 0.0 C 0.0 7.0 0.0 0.0 A 7.0 0.0 0.0 0.0 G 0.0 0.0 7.0 0.0 A 7.0 0.0 0.0 0.0 """) value = align_info.information_content(5, 50, chars_to_ignore=["N"]) self.assertAlmostEqual(value, 88.42, places=2) value = align_info.information_content(chars_to_ignore=["N"]) self.assertAlmostEqual(value, 287.55, places=2) e_freq = {"G": 0.25, "C": 0.25, "A": 0.25, "T": 0.25} e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ, IUPAC.unambiguous_dna) value = align_info.information_content(e_freq_table=e_freq_table, chars_to_ignore=["N"]) self.assertAlmostEqual(value, 287.55, places=2) self.assertEqual(align_info.get_column(1), "AAAAAAA") self.assertAlmostEqual(align_info.ic_vector[1], 2.00, places=2) self.assertEqual(align_info.get_column(7), "TTTATTT") self.assertAlmostEqual(align_info.ic_vector[7], 1.41, places=2) handle = StringIO() AlignInfo.print_info_content(align_info, fout=handle) self.assertEqual( handle.getvalue(), """\ 0 T 2.000 1 A 2.000 2 T 2.000 3 A 2.000 4 C 2.000 5 A 2.000 6 T 2.000 7 T 1.408 8 A 2.000 9 A 2.000 10 A 2.000 11 G 2.000 12 A 1.015 13 A 2.000 14 G 2.000 15 G 2.000 16 G 2.000 17 G 2.000 18 G 2.000 19 A 2.000 20 T 2.000 21 G 2.000 22 C 2.000 23 G 2.000 24 G 2.000 25 A 2.000 26 T 2.000 27 A 2.000 28 A 2.000 29 A 2.000 30 T 2.000 31 G 2.000 32 G 2.000 33 A 2.000 34 A 2.000 35 A 2.000 36 G 2.000 37 G 2.000 38 C 2.000 39 G 2.000 40 A 2.000 41 A 2.000 42 A 2.000 43 G 2.000 44 A 2.000 45 A 2.000 46 A 2.000 47 G 2.000 48 A 2.000 49 A 2.000 50 T 2.000 51 A 2.000 52 T 2.000 53 A 2.000 54 T 2.000 55 A 2.000 56 - 0.682 57 - 0.682 58 - 0.333 59 - 0.333 60 - -0.115 61 - -0.115 62 - -0.115 63 - -0.115 64 - -0.115 65 - -0.115 66 A 2.000 67 T 2.000 68 A 2.000 69 T 2.000 70 A 2.000 71 T 2.000 72 T 2.000 73 T 2.000 74 C 1.408 75 A 1.408 76 A 2.000 77 A 2.000 78 T 2.000 79 T 2.000 80 T 1.015 81 C 2.000 82 C 2.000 83 T 2.000 84 T 2.000 85 A 2.000 86 T 2.000 87 A 2.000 88 T 2.000 89 A 2.000 90 C 1.137 91 C 2.000 92 C 2.000 93 A 2.000 94 A 2.000 95 A 2.000 96 T 2.000 97 A 2.000 98 T 2.000 99 A 2.000 100 A 2.000 101 A 2.000 102 A 2.000 103 A 2.000 104 T 2.000 105 A 2.000 106 T 2.000 107 C 2.000 108 T 2.000 109 A 2.000 110 A 2.000 111 T 2.000 112 A 2.000 113 A 2.000 114 A 2.000 115 T 2.000 116 T 2.000 117 A 2.000 118 G 2.000 119 A 2.000 120 T 2.000 121 G 2.000 122 A 2.000 123 A 2.000 124 T 2.000 125 A 2.000 126 T 2.000 127 C 2.000 128 A 2.000 129 A 2.000 130 A 2.000 131 G 2.000 132 A 2.000 133 A 2.000 134 T 2.000 135 C 2.000 136 C 1.408 137 A 2.000 138 T 2.000 139 T 2.000 140 G 2.000 141 A 2.000 142 T 2.000 143 T 2.000 144 T 2.000 145 A 2.000 146 G 2.000 147 T 2.000 148 G 1.408 149 T 2.000 150 A 2.000 151 C 2.000 152 C 2.000 153 A 2.000 154 G 2.000 155 A 2.000 """)
'A' : 0.25, 'T' : 0.25} e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ, IUPAC.unambiguous_dna) print 'relative information:', align_info.information_content( e_freq_table = e_freq_table, chars_to_ignore = ['N']) print 'Column 1:', align_info.get_column(1) print 'IC for column 1:', align_info.ic_vector[1] print 'Column 7:', align_info.get_column(7) print 'IC for column 7:', align_info.ic_vector[7] print 'test print_info_content' AlignInfo.print_info_content(align_info) print "testing reading and writing fasta format..." to_parse = os.path.join(os.curdir, 'Quality', 'example.fasta') alignment = AlignIO.read(open(to_parse), "fasta", alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)) # test the base alignment stuff print 'all_seqs...' for seq_record in alignment: print 'description:', seq_record.description print 'seq:', repr(seq_record.seq) print 'length:', alignment.get_alignment_length() align_info = AlignInfo.SummaryInfo(alignment)
print('relative information content') e_freq = {'G': 0.25, 'C': 0.25, 'A': 0.25, 'T': 0.25} e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ, IUPAC.unambiguous_dna) print('relative information: %0.2f' % align_info.information_content( e_freq_table=e_freq_table, chars_to_ignore=['N'])) print('Column 1: %s' % align_info.get_column(1)) print('IC for column 1: %0.2f' % align_info.ic_vector[1]) print('Column 7: %s' % align_info.get_column(7)) print('IC for column 7: %0.2f' % align_info.ic_vector[7]) print('test print_info_content') AlignInfo.print_info_content(align_info) print("testing reading and writing fasta format...") to_parse = os.path.join(os.curdir, 'Quality', 'example.fasta') alignment = AlignIO.read(to_parse, "fasta", alphabet=Alphabet.Gapped(IUPAC.ambiguous_dna)) # test the base alignment stuff print('all_seqs...') for seq_record in alignment: print('description: %s' % seq_record.description) print('seq: %r' % seq_record.seq) print('length: %i' % alignment.get_alignment_length())