Esempio n. 1
0
    def test_add_array_gap(self):
        from multicov.binary import BinaryAlignment
        from multicov.alphabet import dna_alphabet, rna_alphabet
        bin_align = BinaryAlignment(
            [[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
            dna_alphabet,
            include_gaps=True)
        bin_align.add(
            [[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
            rna_alphabet)

        self.assertSequenceEqual(bin_align.alphabets, [(dna_alphabet, 4),
                                                       (rna_alphabet, 4)])
        self.assertTrue(
            np.array_equal(
                bin_align.data.todense(),
                np.asmatrix([[
                    0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                    0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1
                ],
                             [
                                 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
                                 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
                                 0, 1, 0, 0, 0, 0, 0, 0, 0, 1
                             ],
                             [
                                 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
                                 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
                                 1, 0, 0, 0, 0, 0, 1, 0, 0, 0
                             ]])))
Esempio n. 2
0
 def test_raise_on_add_wrong_width(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import protein_alphabet, rna_alphabet
     bin_align = BinaryAlignment(
         [[
             0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ]], protein_alphabet)
     data = np.asmatrix([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                         [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                         [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]])
     # don't raise on properly-sized data
     try:
         bin_align.add(data, rna_alphabet)
     except ValueError:
         self.fail(
             "BinaryAlignment.add() raised ValueError on properly-sized data."
         )
     # raised on improperly-sized
     with self.assertRaises(ValueError):
         bin_align.add(data[:, :-1], rna_alphabet)
Esempio n. 3
0
 def test_set_default_reference_on_empty(self):
     from multicov.binary import BinaryAlignment
     from multicov.alignment import ReferenceMapping
     from multicov.alphabet import rna_alphabet
     bin_align = BinaryAlignment()
     bin_align.add([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]],
                   rna_alphabet)
     self.assertEqual(bin_align.reference, ReferenceMapping(list(range(4))))
Esempio n. 4
0
 def test_add_to_empty(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import dna_alphabet
     bin_align1 = BinaryAlignment(
         [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dna_alphabet)
     bin_align2 = BinaryAlignment()
     bin_align2.add(bin_align1)
     self.assertEqual(bin_align1, bin_align2)
Esempio n. 5
0
 def test_equal_self_multi_alpha(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import rna_alphabet, dna_alphabet
     bin_align = BinaryAlignment(
         [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dna_alphabet)
     bin_align.add([[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]],
                   rna_alphabet)
     self.assertTrue(bin_align == bin_align)
     self.assertFalse(bin_align != bin_align)
Esempio n. 6
0
 def test_add_alignment_gap_to_gap(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import protein_alphabet, dna_alphabet
     bin_align1 = BinaryAlignment(
         [[
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
              0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ],
          [
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ]],
         protein_alphabet,
         include_gaps=True)
     bin_align2 = BinaryAlignment(
         [[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
         dna_alphabet,
         include_gaps=True)
     bin_align = BinaryAlignment(bin_align1)
     bin_align.add(bin_align2)
     self.assertNotEqual(bin_align, bin_align1)
     self.assertNotEqual(bin_align, bin_align2)
     self.assertSequenceEqual(bin_align.alphabets, [(protein_alphabet, 2),
                                                    (dna_alphabet, 4)])
     self.assertTrue(
         np.array_equal(
             bin_align.data.todense(),
             np.asmatrix([[
                 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                 0, 1
             ],
                          [
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                              0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                              0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                              0, 1
                          ],
                          [
                              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
                              0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
                              0, 0
                          ]])))
Esempio n. 7
0
    def test_unequal_different_annotations(self):
        from multicov.binary import BinaryAlignment
        from multicov.alphabet import dna_alphabet, rna_alphabet

        bin_align1 = BinaryAlignment(
            [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dna_alphabet)
        bin_align1.add([[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]],
                       rna_alphabet)

        bin_align2 = BinaryAlignment(bin_align1)
        bin_align2.annotations['seqw'] = [0.5, 1.2]
        bin_align2.annotations['fitness'] = [0, -1]
        self.assertTrue(bin_align1 != bin_align2)
        self.assertFalse(bin_align1 == bin_align2)
Esempio n. 8
0
 def test_raise_on_add_wrong_length(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import protein_alphabet, rna_alphabet
     bin_align = BinaryAlignment(
         [[
             0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ]], protein_alphabet)
     with self.assertRaises(ValueError):
         bin_align.add([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]],
                       rna_alphabet)
Esempio n. 9
0
 def test_return_self(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import dna_alphabet, rna_alphabet
     bin_align = BinaryAlignment(
         [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
          [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], dna_alphabet)
     ret_align = bin_align.add(
         [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
          [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], rna_alphabet)
     self.assertIs(bin_align, ret_align)
Esempio n. 10
0
 def test_set_default_reference_on_nonempty(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import protein_alphabet, dna_alphabet
     bin_align = BinaryAlignment(
         [[
             0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ]], protein_alphabet)
     bin_align.add([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                    [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
                   dna_alphabet)
     self.assertEqual(len(bin_align.reference.seqs), 2)
     self.assertSequenceEqual(bin_align.reference.seqs[1], list(range(4)))
Esempio n. 11
0
 def test_multi(self):
     from multicov.binary import BinaryAlignment
     from multicov.alphabet import protein_alphabet, dna_alphabet
     bin_align = BinaryAlignment(
         [[
             0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
         ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ],
          [
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
          ]], protein_alphabet)
     bin_align.add([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
                    [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]],
                   dna_alphabet)
     self.assertEqual(len(bin_align.alphabets), 2)
     self.assertEqual(bin_align.alphabets[0], (protein_alphabet, 2))
     self.assertEqual(bin_align.alphabets[1], (dna_alphabet, 4))
Esempio n. 12
0
    def test_unequal_different_alphabet_widths(self):
        from multicov.binary import BinaryAlignment
        from multicov.alphabet import rna_alphabet, dna_alphabet
        bin_align1 = BinaryAlignment(
            [[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dna_alphabet)
        bin_align1.add([[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                        [0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]],
                       rna_alphabet)

        bin_align2 = BinaryAlignment([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                                      [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]],
                                     dna_alphabet)
        bin_align2.add(
            [[0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
             [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]],
            rna_alphabet)

        self.assertTrue(
            np.array_equal(bin_align1.data.todense(),
                           bin_align2.data.todense()))
        self.assertTrue(bin_align1 != bin_align2)
        self.assertFalse(bin_align1 == bin_align2)