Ejemplo n.º 1
0
 def test_save_load(self):
     seq = BioSeq.create_bio_seq("ACTGTCATAT")
     seq.save('tests/files/test_save_load.csv')
     loaded_seq = BioSeq.load('tests/files/test_save_load.csv')
     self.assertEqual(seq.get_seq(), loaded_seq.get_seq())
     self.assertEqual(seq.get_genetic_code(), loaded_seq.get_genetic_code())
     self.assertEqual(seq.__class__.__name__, loaded_seq.__class__.__name__)
Ejemplo n.º 2
0
 def test_eq(self):
     s1 = BioSeq("ATATAT", "DNA")
     s2 = BioSeq("ATATAT", "DNA")
     self.assertTrue(s1 == s2)
     self.assertFalse(s1 != s2)
     s3 = BioSeq("ATATATX", "DNA")
     s4 = BioSeq("ATATAT", "PROTEIN")
     self.assertFalse(s1 == s3)
Ejemplo n.º 3
0
 def test_frequency(self):
     s = BioSeq("AACCCCCTGG", "DNA")
     self.assertDictEqual({
         "A": 0.2,
         "C": 0.5,
         "T": 0.1,
         "G": 0.2
     }, s.frequency())
Ejemplo n.º 4
0
 def test_global_align(self):
     # example from page 39 from slides
     s1 = BioSeq("PHSWG", "PROTEIN")
     s2 = BioSeq("HGWAG", "PROTEIN")
     sm = read_substitution_matrix_file("test/blosum62.mat")
     s, t = s1.global_align_multiple_solutions(s2, sm, -8)
     self.assertListEqual([-40, -24, -10, 3, 11, 9], s[-1])
     recover = list(s1.recover_global_align_multiple_solutions(s2, t))
     self.assertListEqual([('PHSW_G', '_HGWAG')], recover)
Ejemplo n.º 5
0
    def test_validate(self):
        dna = BioSeq.create_bio_seq("ACTG")
        rna = BioSeq.create_bio_seq("ACUG", "rna")
        self.assertTrue(dna.validate())
        self.assertTrue(rna.validate())

        # seq.csv contains a DNA sequence
        rna.read_sequence('tests/files/seq.csv')
        self.assertFalse(rna.validate())
Ejemplo n.º 6
0
    def test_recover_local_align_multiple_solutions(self):
        # Classes example
        ga_score, ga_trace, _ = local_align_multiple_solutions(
            self.slides_seq2, self.slides_seq1, self.sm, -8)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     self.slides_seq2,
                                                     self.slides_seq1)

        self.assertEqual(rga, [['HSW', 'HGW'], ['HSWG', 'HGWA']])

        ga_score, ga_trace, _ = local_align_multiple_solutions(
            self.seq1, self.seq2, self.sm_dna, -1)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     self.seq1, self.seq2)

        self.assertEqual(rga, [['AT', 'AT'], ['CA', 'CA']])

        seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
        ga_score, ga_trace, _ = local_align_multiple_solutions(
            seqs['sp|C1F111'], seqs['sp|B7JC18'], self.sm, -3)
        rga = recover_local_align_multiple_solutions(ga_score, ga_trace,
                                                     seqs['sp|C1F111'],
                                                     seqs['sp|B7JC18'])

        # 4 local optimal alignments between sp|C1F111 & sp|B7JC18
        self.assertEqual(len(rga), 4)

        print('>> Passed test_recover_local_align_multiple_solutions()')
Ejemplo n.º 7
0
 def test_save_load(self):
     filename = "test_save_load.tmp"
     s = BioSeq("ATATACAGATGAT", "DNA")
     s.save(filename)
     s2 = BioSeq("", "RNA")
     s2.load(filename)
     self.assertEqual(s.seq_type, s2.seq_type)
     self.assertEqual(s.sequence, s2.sequence)
     os.remove(filename)
Ejemplo n.º 8
0
 def test_gc_content(self):
     s = BioSeq("AACCCCCTGG", "DNA")
     self.assertEqual(0.70, s.gc_content())
     s.sequence = "AA"
     self.assertEqual(0.0, s.gc_content())
     s.sequence = "GC"
     self.assertEqual(1.0, s.gc_content())
Ejemplo n.º 9
0
    def test_read_write(self):
        seq = BioSeq.create_bio_seq("ACTGTCATAT")
        len_bwr = len(seq)

        seq.write_sequence('tests/files/test_write.csv')
        seq.read_sequence('tests/files/seq.csv')
        self.assertGreater(len(seq), len_bwr)
        self.assertEqual(2591, len(seq))

        seq.read_sequence('tests/files/test_write.csv')
        self.assertEqual(len_bwr, len(seq))
Ejemplo n.º 10
0
 def test_getslice(self):
     s = BioSeq("ATATAT", "DNA")
     self.assertEqual("A", s[0:1])
     self.assertEqual("T", s[-1:])
     self.assertEqual("AT", s[0:2])
     self.assertEqual("A", s[0:1:1])
     self.assertEqual("T", s[-1::1])
     self.assertEqual("AAA", s[0::2])
     # explicit invocation of function
     self.assertEqual("A", s.__getslice__(0, 1))
     self.assertEqual("T", s.__getslice__(-1, len(s)))
     self.assertEqual("AT", s.__getslice__(0, 2))
     self.assertEqual("A", s.__getslice__(0, 1, 1))
     self.assertEqual("T", s.__getslice__(-1, len(s), 1))
     self.assertEqual("AAA", s.__getslice__(0, len(s), 2))
Ejemplo n.º 11
0
 def test_dot_plot(self):
     s1 = BioSeq("ATAT", "DNA")
     s2 = BioSeq("ATAT", "DNA")
     m = s1.dot_plot(s2)
     self.assertEqual(m.sum(), 8)
     s3 = BioSeq("XXXT", "DNA")
     m = s1.dot_plot(s3)
     self.assertEqual(m.sum(), 2)
Ejemplo n.º 12
0
 def test_global_align2(self):
     # example from page 46 from slides
     s1 = BioSeq("PHSWG", "PROTEIN")
     s2 = BioSeq("HGWAG", "PROTEIN")
     sm = read_substitution_matrix_file("test/blosum62.mat")
     s, t = s1.local_align_multiple_solutions(s2, sm, -8)
     self.assertListEqual([0, 0, 6, 11, 19, 17], s[-1])
     recover = list(s1.recover_local_align_multiple_solutions(s2, t, s))
     self.assertListEqual([('HSW', 'HGW'), ('HSWG', 'HGWA')], recover)
     self.assertEqual(
         len(BioSeq.compare_pairwise_global_align([s1, s2], sm, -8)), 2)
     self.assertEqual(
         len(BioSeq.compare_pairwise_local_align([s1, s2], sm, -8)), 2)
Ejemplo n.º 13
0
    def test_compare_pairwise_num_local_align(self):
        seqs = list(
            BioSeq.read_fasta_file(
                'tests/files/protein_sequences.fas').values())
        cla = compare_pairwise_num_local_align(seqs, self.sm, -3)

        # Some random values
        self.assertEqual(
            cla[4][3],
            4)  # Between sp|C1F111 & sp|B7JC18 - matches previous test
        self.assertEqual(cla[0][0], 1)
        self.assertEqual(cla[9][6], 144)
        self.assertEqual(cla[8][10], 1152)

        print('>> Passed test_compare_num_pairwise_local_align()')
Ejemplo n.º 14
0
    def test_recover_global_align_multiple_solutions(self):
        _, ga_trace = global_align_multiple_solutions(self.slides_seq1,
                                                      self.slides_seq2,
                                                      self.sm, -3)
        rga = recover_global_align_multiple_solutions(ga_trace,
                                                      self.slides_seq1,
                                                      self.slides_seq2)
        seq1_alignments = [align[0] for align in rga]
        seq2_alignments = [align[1] for align in rga]

        # Classes Example
        self.assertTrue('-HGWAG' in seq1_alignments)
        self.assertTrue('PHSW-G' in seq2_alignments)

        # C2 example
        _, ga_trace = global_align_multiple_solutions(self.seq1, self.seq2,
                                                      self.sm_dna, -1)
        rga = recover_global_align_multiple_solutions(ga_trace, self.seq1,
                                                      self.seq2)
        seq1_alignments = [align[0] for align in rga]
        seq2_alignments = [align[1] for align in rga]

        self.assertEqual(len(seq1_alignments), 3)
        self.assertTrue('G-ATTACA' in seq1_alignments)
        self.assertTrue('GCA-TGCT' in seq2_alignments)
        self.assertTrue('GCAT-GCT' in seq2_alignments)
        self.assertTrue('GCATG-CT' in seq2_alignments)

        seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
        _, ga_trace = global_align_multiple_solutions(seqs['sp|C1F111'],
                                                      seqs['sp|B7JC18'],
                                                      self.sm, -3)
        rga = recover_global_align_multiple_solutions(ga_trace,
                                                      seqs['sp|C1F111'],
                                                      seqs['sp|B7JC18'])

        # 5760 global optimal alignments between sp|C1F111 & sp|B7JC18
        self.assertEqual(len(rga), 5760)

        print('>> Passed test_recover_global_align_multiple_solutions()')
Ejemplo n.º 15
0
 def test_basics(self):
     seq = BioSeq.create_bio_seq("ACTGTCATGAT")
     self.assertEqual(seq.get_seq(), "ACTGTCATGAT")
     self.assertEqual(len(seq), len("ACTGTCATGAT"))
Ejemplo n.º 16
0
 def test_rev_complement(self):
     self.assertEqual(
         BioSeq.create_bio_seq("ACGGTA").reverse_complement(), "TACCGT")
     self.assertEqual(
         BioSeq.create_bio_seq("ACGUUA", "rna").reverse_complement(),
         "UAACGU")
Ejemplo n.º 17
0
 def test_gc_percent(self):
     dna = BioSeq.create_bio_seq("ACGG")
     self.assertEqual(dna.gc_percent(), 0.75)
     self.assertEqual(dna.gc_percent_sub_seq(2), [0.5, 1])
Ejemplo n.º 18
0
 def test_score_seq(self):
     sm = substitution_matrix("ATCG", 2, -3)
     s1 = BioSeq("ATAT", "DNA")
     s2 = BioSeq("ATAG", "DNA")
     self.assertEqual(3, s1.score_seq(s2, sm, 3))
Ejemplo n.º 19
0
 def test_str(self):
     s = BioSeq("ATATAT", "DNA")
     self.assertEqual("ATATAT", str(s))
Ejemplo n.º 20
0
 def test_pretty_print(self):
     s = BioSeq("ATATACAGATGAT", "DNA")
     self._test_pretty_print(s)
Ejemplo n.º 21
0
 def test_constructor(self):
     s = BioSeq("ATATat", "DNa")
     self.assertIsInstance(s, BioSeq)
     self.assertEqual("ATATAT", s.sequence)
     self.assertEqual("DNA", s.seq_type)
     self.assertRaises(Exception, BioSeq, "ATAT", "smth")
Ejemplo n.º 22
0
 def test_assert_seq_type(self):
     s = BioSeq("ATATAT", "DNA")
     s = BioSeq("ATATAT", "RNA")
     s = BioSeq("ATATAT", "PROTEIN")
     self.assertRaises(Exception, BioSeq, "ATATA", "asd")
Ejemplo n.º 23
0
 def test_getitem(self):
     s = BioSeq("ATATAT", "DNA")
     self.assertEqual("A", s[0])
     self.assertEqual("T", s[-1])
Ejemplo n.º 24
0
 def test_repr(self):
     s = [BioSeq("ATATAT", "DNA")]
     self.assertEqual("[ATATAT]", str(s))
Ejemplo n.º 25
0
 def test_freq(self):
     seq = BioSeq.create_bio_seq("ACTGTCATAT")
     self.assertEqual(seq.freq_symbols(), {"T": 4, "A": 3, "C": 2, "G": 1})
Ejemplo n.º 26
0
 def test_score_affine_gap(self):
     sm = read_substitution_matrix_file("test/blosum62.mat")
     s1 = BioSeq("LGPSSGCASRIWTKSA", "PROTEIN")
     s2 = BioSeq("TGPS_G__S_IWSKSG", "PROTEIN")
     self.assertEqual(33, s1.score_affine_gap(s2, sm, -8, -2))
Ejemplo n.º 27
0
 def test_len(self):
     s = BioSeq("ATATAT", "DNA")
     self.assertEqual(6, len(s))
Ejemplo n.º 28
0
 def test_add_gap(self):
     s = BioSeq("ATGC", "DNA")
     self.assertEqual("ATGC", str(s))
     s.add_gap(2)
     self.assertEqual("AT_GC", str(s))
Ejemplo n.º 29
0
        print('Seq1: ' + str(align[0]) + '\nSeq2: ' + str(align[1]) + '\n')
    wait_input()

    print("\n-------------------------\n")

    print("Now shall we make it a little bit more complex?\n")
    wait_input()

    print(
        "So, lets load and use the proteins present in the 'tests/files/protein_sequences.fas' file!\n\
        We need to use the read fasta functionality from the bioseq library to load them.\n"
    )
    wait_input()

    print("The protein sequences are:\n")
    seqs = BioSeq.read_fasta_file('tests/files/protein_sequences.fas')
    for key, value in seqs.items():
        print("> " + str(key) + ": " + str(value) + "\n")
    wait_input()

    print("::: GLOBAL ALIGNMENT with multiple solutions :::\n")
    print(
        "\nLets make the global alignment with two of our protein sequences: sp|B0C882: & sp|A1TQI0, the 'sm_dna' and a gap of -3, shall we?\n"
    )
    p_seq1 = seqs["sp|B0C882"]
    p_seq2 = seqs["sp|A1TQI0"]
    wait_input()

    ga_score, ga_trace = global_align_multiple_solutions(
        p_seq1, p_seq2, sm_blosum, -3)
    print("Score matrix obtained:\n")
Ejemplo n.º 30
0
 def test_hamming_distance(self):
     s1 = BioSeq("ATATACAGATGAT", "DNA")
     s2 = BioSeq("ATATACAGATGAT", "DNA")
     self.assertEqual(0, s1.hamming_distance(s2))
     self.assertEqual(0, s2.hamming_distance(s1))
     s3 = BioSeq("ATATACAGATGAX", "DNA")
     self.assertEqual(1, s1.hamming_distance(s3))
     self.assertEqual(1, s3.hamming_distance(s1))
     s4 = BioSeq("AAA", "DNA")
     self.assertRaises(Exception, s1.hamming_distance, s4)