Beispiel #1
0
    def test_to_phylip_no_positions(self):
        d1 = DNASequence('', id="d1")
        d2 = DNASequence('', id="d2")
        a = Alignment([d1, d2])

        with self.assertRaises(SequenceCollectionError):
            npt.assert_warns(UserWarning, a.to_phylip)
Beispiel #2
0
    def setUp(self):
        """Initialize values to be used in tests
        """
        self.d1 = DNASequence('GATTACA', id="d1")
        self.d2 = DNASequence('TTG', id="d2")
        self.d1_lower = DNASequence('gattaca', id="d1")
        self.d2_lower = DNASequence('ttg', id="d2")
        self.r1 = RNASequence('GAUUACA', id="r1")
        self.r2 = RNASequence('UUG', id="r2")
        self.r3 = RNASequence('U-----UGCC--', id="r3")

        self.i1 = DNASequence('GATXACA', id="i1")

        self.seqs1 = [self.d1, self.d2]
        self.seqs1_lower = [self.d1_lower, self.d2_lower]
        self.seqs2 = [self.r1, self.r2, self.r3]
        self.seqs3 = self.seqs1 + self.seqs2

        self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')]
        self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'),
                        ('r3', 'U-----UGCC--')]
        self.seqs3_t = self.seqs1_t + self.seqs2_t

        self.s1 = SequenceCollection(self.seqs1)
        self.s1_lower = SequenceCollection(self.seqs1_lower)
        self.s2 = SequenceCollection(self.seqs2)
        self.s3 = SequenceCollection(self.seqs3)
        self.empty = SequenceCollection([])

        self.invalid_s1 = SequenceCollection([self.i1])
Beispiel #3
0
    def setUp(self):
        self.d1 = DNASequence('..ACC-GTTGG..', id="d1")
        self.d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        self.d3 = DNASequence('.-ACC-GTTGC--', id="d3")

        self.r1 = RNASequence('UUAU-', id="r1")
        self.r2 = RNASequence('ACGUU', id="r2")

        self.seqs1 = [self.d1, self.d2, self.d3]
        self.seqs2 = [self.r1, self.r2]

        self.seqs1_t = [('d1', '..ACC-GTTGG..'), ('d2', 'TTACCGGT-GGCC'),
                        ('d3', '.-ACC-GTTGC--')]
        self.seqs2_t = [('r1', 'UUAU-'), ('r2', 'ACGUU')]

        self.a1 = Alignment(self.seqs1)
        self.a2 = Alignment(self.seqs2)
        self.a3 = Alignment(self.seqs2,
                            score=42.0,
                            start_end_positions=[(0, 3), (5, 9)])
        self.a4 = Alignment(self.seqs2,
                            score=-42.0,
                            start_end_positions=[(1, 4), (6, 10)])

        # no sequences
        self.empty = Alignment([])

        # sequences, but no positions
        self.no_positions = Alignment([RNA('', id='a'), RNA('', id='b')])
Beispiel #4
0
 def test_reverse_complement(self):
     self.assertEqual(self.b1.reverse_complement(), DNASequence("TGTAATC"))
     self.assertEqual(self.b2.reverse_complement(),
                      DNASequence("GGTACCGGT"))
     self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement)
     self.assertEqual(self.b4.reverse_complement(),
                      DNASequence("NVHDBMRSWYK"))
Beispiel #5
0
 def setUp(self):
     """Setup for Fasta tests."""
     self.strings = ['AAAA', 'CCCC', 'gggg', 'uuuu']
     self.fasta_no_label = '>0\nAAAA\n>1\nCCCC\n>2\ngggg\n>3\nuuuu'
     self.fasta_with_label =\
         '>1st\nAAAA\n>2nd\nCCCC\n>3rd\nGGGG\n>4th\nUUUU'
     self.fasta_with_label_lw2 =\
         '>1st\nAA\nAA\n>2nd\nCC\nCC\n>3rd\nGG\nGG\n>4th\nUU\nUU'
     self.alignment_dict = {
         '1st': 'AAAA',
         '2nd': 'CCCC',
         '3rd': 'GGGG',
         '4th': 'UUUU'
     }
     self.sequence_objects_a = [
         DNASequence('ACTCGAGATC', 'seq1'),
         DNASequence('GGCCT', 'seq2')
     ]
     self.sequence_objects_b = [
         BiologicalSequence('ACTCGAGATC', 'seq1'),
         BiologicalSequence('GGCCT', 'seq2')
     ]
     seqs = [
         DNASequence("ACC--G-GGTA..", id="seq1"),
         DNASequence("TCC--G-GGCA..", id="seqs2")
     ]
     self.alignment = Alignment(seqs)
Beispiel #6
0
    def test_to_phylip_unequal_sequence_lengths(self):
        d1 = DNASequence('A-CT', id="d1")
        d2 = DNASequence('TTA', id="d2")
        d3 = DNASequence('.-AC', id="d3")
        a = Alignment([d1, d2, d3])

        with self.assertRaises(SequenceCollectionError):
            npt.assert_warns(UserWarning, a.to_phylip)
Beispiel #7
0
 def test_nondegenerates_mixed_degens(self):
     exp = [
         DNASequence('AGC'),
         DNASequence('AGT'),
         DNASequence('GGC'),
         DNASequence('GGT')
     ]
     obs = sorted(DNASequence('RGY').nondegenerates(), key=str)
     self.assertEqual(obs, exp)
Beispiel #8
0
 def test_nondegenerates_gap_mixed_case(self):
     exp = [
         DNASequence('-A.a'),
         DNASequence('-A.c'),
         DNASequence('-C.a'),
         DNASequence('-C.c')
     ]
     obs = sorted(DNASequence('-M.m').nondegenerates(), key=str)
     self.assertEqual(obs, exp)
Beispiel #9
0
    def test_majority_consensus_constructor(self):
        d1 = DNASequence('TTT', id="d1")
        d2 = DNASequence('TT-', id="d2")
        d3 = DNASequence('TC-', id="d3")
        a1 = Alignment([d1, d2, d3])

        obs = npt.assert_warns(UserWarning, a1.majority_consensus,
                               constructor=str)
        self.assertEqual(obs, 'TT-')
Beispiel #10
0
    def test_validate_lengths(self):
        self.assertTrue(self.a1._validate_lengths())
        self.assertTrue(self.a2._validate_lengths())
        self.assertTrue(self.empty._validate_lengths())

        self.assertTrue(Alignment([
            DNASequence('TTT', id="d1")])._validate_lengths())
        self.assertFalse(Alignment([
            DNASequence('TTT', id="d1"),
            DNASequence('TT', id="d2")])._validate_lengths())
Beispiel #11
0
    def test_subalignment(self):
        """subalignment functions as expected
        """
        # keep seqs by ids
        actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'])
        expected = Alignment([self.d1, self.d3])
        self.assertEqual(actual, expected)

        # keep seqs by indices
        actual = self.a1.subalignment(seqs_to_keep=[0, 2])
        expected = Alignment([self.d1, self.d3])
        self.assertEqual(actual, expected)

        # keep seqs by ids (invert)
        actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'],
                                      invert_seqs_to_keep=True)
        expected = Alignment([self.d2])
        self.assertEqual(actual, expected)

        # keep seqs by indices (invert)
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      invert_seqs_to_keep=True)
        expected = Alignment([self.d2])
        self.assertEqual(actual, expected)

        # keep positions
        actual = self.a1.subalignment(positions_to_keep=[0, 2, 3])
        d1 = DNASequence('.AC', id="d1")
        d2 = DNASequence('TAC', id="d2")
        d3 = DNASequence('.AC', id="d3")
        expected = Alignment([d1, d2, d3])
        self.assertEqual(actual, expected)

        # keep positions (invert)
        actual = self.a1.subalignment(positions_to_keep=[0, 2, 3],
                                      invert_positions_to_keep=True)
        d1 = DNASequence('.C-GTTGG..', id="d1")
        d2 = DNASequence('TCGGT-GGCC', id="d2")
        d3 = DNASequence('-C-GTTGC--', id="d3")
        expected = Alignment([d1, d2, d3])
        self.assertEqual(actual, expected)

        # keep seqs and positions
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      positions_to_keep=[0, 2, 3])
        d1 = DNASequence('.AC', id="d1")
        d3 = DNASequence('.AC', id="d3")
        expected = Alignment([d1, d3])
        self.assertEqual(actual, expected)

        # keep seqs and positions (invert)
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      positions_to_keep=[0, 2, 3],
                                      invert_seqs_to_keep=True,
                                      invert_positions_to_keep=True)
        d2 = DNASequence('TCGGT-GGCC', id="d2")
        expected = Alignment([d2])
        self.assertEqual(actual, expected)
Beispiel #12
0
    def test_to_phylip(self):
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        d3 = DNASequence('.-ACC-GTTGC--', id="d3")
        a = Alignment([d1, d2, d3])

        phylip_str, id_map = npt.assert_warns(DeprecationWarning,
                                              a.to_phylip,
                                              map_labels=False)
        self.assertEqual(id_map, {'d1': 'd1', 'd3': 'd3', 'd2': 'd2'})
        expected = "\n".join([
            "3 13", "d1 ..ACC-GTTGG..", "d2 TTACCGGT-GGCC", "d3 .-ACC-GTTGC--"
        ])
        self.assertEqual(phylip_str, expected)
    def test_to_phylip_map_labels(self):
        """to_phylip functions as expected with label mapping
        """
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        d3 = DNASequence('.-ACC-GTTGC--', id="d3")
        a = Alignment([d1, d2, d3])

        phylip_str, id_map = a.to_phylip(map_labels=True, label_prefix="s")
        self.assertEqual(id_map, {'s1': 'd1', 's3': 'd3', 's2': 'd2'})
        expected = "\n".join([
            "3 13", "s1 ..ACC-GTTGG..", "s2 TTACCGGT-GGCC", "s3 .-ACC-GTTGC--"
        ])
        self.assertEqual(phylip_str, expected)
Beispiel #14
0
    def test_init_validate(self):
        """initialization with validation functions as expected
        """
        Alignment(self.seqs1, validate=True)

        # invalid DNA character
        invalid_seqs1 = [self.d1, self.d2, self.d3,
                         DNASequence('.-ACC-GTXGC--', id="i1")]
        self.assertRaises(SequenceCollectionError, Alignment,
                          invalid_seqs1, validate=True)

        # invalid lengths (they're not all equal)
        invalid_seqs2 = [self.d1, self.d2, self.d3,
                         DNASequence('.-ACC-GTGC--', id="i2")]
        self.assertRaises(SequenceCollectionError, Alignment,
                          invalid_seqs2, validate=True)
Beispiel #15
0
    def test_is_valid(self):
        """is_valid functions as expected
        """
        self.assertTrue(self.a1.is_valid())
        self.assertTrue(self.a2.is_valid())
        self.assertTrue(self.empty.is_valid())

        # invalid because of length mismatch
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGC', id="d2")
        self.assertFalse(Alignment([d1, d2]).is_valid())

        # invalid because of invalid charaters
        d1 = DNASequence('..ACC-GTXGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        self.assertFalse(Alignment([d1, d2]).is_valid())
Beispiel #16
0
    def test_to_phylip_map_labels(self):
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        d3 = DNASequence('.-ACC-GTTGC--', id="d3")
        a = Alignment([d1, d2, d3])

        phylip_str, id_map = npt.assert_warns(UserWarning, a.to_phylip,
                                              map_labels=True,
                                              label_prefix="s")
        self.assertEqual(id_map, {'s1': 'd1',
                                  's3': 'd3',
                                  's2': 'd2'})
        expected = "\n".join(["3 13",
                              "s1 ..ACC-GTTGG..",
                              "s2 TTACCGGT-GGCC",
                              "s3 .-ACC-GTTGC--"])
        self.assertEqual(phylip_str, expected)
Beispiel #17
0
 def generator():
     yield BiologicalSequence('ACGT',
                              id='',
                              description='',
                              quality=range(4))
     yield RNASequence('GAU', id='  foo \t\t bar ', description='')
     yield DNASequence('TAG', id='', description='foo\n\n bar\n')
     yield BiologicalSequence('A',
                              id='foo',
                              description='bar baz',
                              quality=[42])
Beispiel #18
0
    def test_init_validate(self):
        Alignment(self.seqs1, validate=True)

        # invalid DNA character
        invalid_seqs1 = [
            self.d1, self.d2, self.d3,
            DNASequence('.-ACC-GTXGC--', id="i1")
        ]
        self.assertRaises(SequenceCollectionError,
                          Alignment,
                          invalid_seqs1,
                          validate=True)
Beispiel #19
0
    def test_majority_consensus(self):
        d1 = DNASequence('TTT', id="d1")
        d2 = DNASequence('TT-', id="d2")
        d3 = DNASequence('TC-', id="d3")
        a1 = Alignment([d1, d2, d3])
        self.assertTrue(a1.majority_consensus().equals(DNASequence('TT-')))

        d1 = DNASequence('T', id="d1")
        d2 = DNASequence('A', id="d2")
        a1 = Alignment([d1, d2])
        self.assertTrue(a1.majority_consensus() in
                        [DNASequence('T'), DNASequence('A')])

        self.assertEqual(self.empty.majority_consensus(), '')
Beispiel #20
0
 def setUp(self):
     """Setup for stockholm tests."""
     self.seqs = [DNASequence("ACC-G-GGTA", id="seq1"),
                  DNASequence("TCC-G-GGCA", id="seq2")]
     self.GF = OrderedDict([
         ("AC", "RF00360"),
         ("BM", ["cmbuild  -F CM SEED",
                 "cmsearch  -Z 274931 -E 1000000"]),
         ("SQ", "9"),
         ("RT", ["TITLE1",  "TITLE2"]),
         ("RN", ["[1]", "[2]"]),
         ("RA", ["Auth1;", "Auth2;"]),
         ("RL", ["J Mol Biol", "Cell"]),
         ("RM", ["11469857", "12007400"]),
         ('RN', ['[1]', '[2]'])
     ])
     self.GS = {"AC": OrderedDict([("seq1", "111"), ("seq2", "222")])}
     self.GR = {"SS": OrderedDict([("seq1", "1110101111"),
                                   ("seq2", "0110101110")])}
     self.GC = {"SS_cons": "(((....)))"}
     self.st = StockholmAlignment(self.seqs, gc=self.GC, gf=self.GF,
                                  gs=self.GS, gr=self.GR)
Beispiel #21
0
 def setUp(self):
     self.empty = DNASequence('')
     self.b1 = DNASequence('GATTACA')
     self.b2 = DNASequence('ACCGGTACC',
                           id="test-seq-2",
                           description="A test sequence")
     self.b3 = DNASequence('ACCGGUACC',
                           id="bad-seq-1",
                           description="Not a DNA sequence")
     self.b4 = DNASequence('MRWSYKVHDBN',
                           id="degen",
                           description="All of the degenerate bases")
     self.b5 = DNASequence('.G--ATTAC-A...')
Beispiel #22
0
 def test_is_reverse_complement(self):
     self.assertFalse(self.b1.is_reverse_complement(self.b1))
     self.assertTrue(self.b1.is_reverse_complement(DNASequence('TGTAATC')))
     self.assertTrue(
         self.b4.is_reverse_complement(DNASequence('NVHDBMRSWYK')))
Beispiel #23
0
 def missing_qual_gen():
     for seq in (RNASequence('A',
                             quality=[42]), BiologicalSequence('AG'),
                 DNASequence('GG', quality=[41, 40])):
         yield seq
Beispiel #24
0
 def blank_seq_gen():
     for seq in (DNASequence('A'), BiologicalSequence(''),
                 RNASequence('GG')):
         yield seq
Beispiel #25
0
 def test_init_not_equal_lengths(self):
     invalid_seqs = [
         self.d1, self.d2, self.d3,
         DNASequence('.-ACC-GTGC--', id="i2")
     ]
     self.assertRaises(AlignmentError, Alignment, invalid_seqs)
Beispiel #26
0
    def test_nondegenerates_all_degens(self):
        # Same chars.
        exp = [
            DNASequence('CC'),
            DNASequence('CG'),
            DNASequence('GC'),
            DNASequence('GG')
        ]
        # Sort based on sequence string, as order is not guaranteed.
        obs = sorted(DNASequence('SS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Different chars.
        exp = [
            DNASequence('AC'),
            DNASequence('AG'),
            DNASequence('GC'),
            DNASequence('GG')
        ]
        obs = sorted(DNASequence('RS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Odd number of chars.
        obs = list(DNASequence('NNN').nondegenerates())
        self.assertEqual(len(obs), 4**3)
Beispiel #27
0
 def test_nondegenerates_invalid(self):
     with self.assertRaises(BiologicalSequenceError):
         list(DNASequence('AZA').nondegenerates())
Beispiel #28
0
 def test_complement(self):
     self.assertEqual(self.b1.complement(), DNASequence("CTAATGT"))
     self.assertEqual(self.b2.complement(), DNASequence("TGGCCATGG"))
     self.assertRaises(BiologicalSequenceError, self.b3.complement)
     self.assertEqual(self.b4.complement(), DNASequence("KYWSRMBDHVN"))
     self.assertEqual(self.b5.complement(), DNASequence(".C--TAATG-T..."))