Ejemplo n.º 1
0
    def test_subset_positions_Alignment(self):
        rna1 = RnaSequence("UCG", name="rna1")
        rna2 = RnaSequence("YCG", name="rna2")
        rna3 = RnaSequence("CAR", name="rna3")

        sub_aln = Alignment([rna1, rna2, rna3], moltype=RNA)

        obs_sub_aln = self.aln.take_positions([0, 1, 5])
        self.assertEqual(obs_sub_aln, sub_aln)
        self.assertNotEqual(obs_sub_aln, self.aln)
        # string representations should be the same. This fails right
        # now, because sequence order is not maintained. See separate test.
        self.assertEqual(str(obs_sub_aln), str(sub_aln))
Ejemplo n.º 2
0
    def test_seq_ungapping(self):
        rna1 = RnaSequence("U-C-A-G-", name="rna1")
        model1 = ArraySequence("U-C-A-G-",
                               name="rna1",
                               alphabet=RNA.alphabets.degen_gapped)

        self.assertEqual(rna1, "U-C-A-G-")
        self.assertEqual(rna1.degap(), "UCAG")

        # check is produces the right string from the beginning
        self.assertEqual(str(model1), "U-C-A-G-")
        self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4])
        # ArraySequence should maybe have the same degap method as normal seq
        self.assertEqual(str(model1.degap()), "UCAG")
Ejemplo n.º 3
0
    def test_full(self):
        """RdbParser: full data, valid and invalid"""
        # when only good record, should work independent of strict
        r1 = RnaSequence(
            "-??GG-UGAA--CGCU---ACGU-N???---",
            info=Info({
                "Species": "unidentified Thermus OPB AF027020",
                "Refs": {
                    "rRNA": ["AF027020"]
                },
                "OriginalSeq": "-o[oGG-U{G}AA--C^GC]U---ACGU-Nooo---",
            }),
        )
        r2 = RnaSequence(
            "---CGAUCG--UAUACG-N???-",
            info=Info({
                "Species": "Thermus silvanus X84211",
                "Refs": {
                    "rRNA": ["X84211"]
                },
                "OriginalSeq": "---CGAU[C(G){--UA}U]ACG-Nooo-",
            }),
        )
        obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=True))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)
        self.assertEqual(obs[1], r2)
        self.assertEqual(str(obs[1]), str(r2))
        self.assertEqual(obs[1].info, r2.info)

        obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=False))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)

        # when strict, should raise error on invalid record
        f = RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=True)
        self.assertRaises(RecordError, list, f)
        # when not strict, malicious record is skipped
        obs = list(RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=False))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)
        self.assertEqual(obs[1], r2)
        self.assertEqual(str(obs[1]), str(r2))
        self.assertEqual(obs[1].info, r2.info)
Ejemplo n.º 4
0
    def test_subset_seqs_Alignment(self):
        rna1 = RnaSequence("UCG", name="rna1")
        rna2 = RnaSequence("YCG", name="rna2")
        rna3 = RnaSequence("CAR", name="rna3")

        sub_aln = Alignment([rna2, rna3], moltype=RNA)
        aln = Alignment([rna1, rna2, rna3], moltype=RNA)
        obs_sub_aln = aln.take_seqs(["rna2", "rna3"])

        self.assertEqual(obs_sub_aln, sub_aln)
        self.assertEqual(str(obs_sub_aln), str(sub_aln))

        # Selected sequences should be in specified order?
        obs_sub_aln_1 = self.aln.take_seqs(["rna3", "rna2"])
        obs_sub_aln_2 = self.aln.take_seqs(["rna2", "rna3"])
        self.assertNotEqual(str(obs_sub_aln_1), str(obs_sub_aln_2))
Ejemplo n.º 5
0
 def test_feature_copy_annotations_to(self):
     """test correct copy of annotations"""
     orig = DnaSequence("TTTTTTTTTTAAAA", name="Orig")
     annot = orig.add_annotation(Feature, "exon", "fred", [(0, 14)])
     seq = RnaSequence("UUUUUUUUUUAAAA", name="Test")
     got = annot.copy_annotations_to(seq)
     self.assertEqual(len(orig.annotations), len(got.annotations))
     for src, dest in zip(orig.annotations, got.annotations):
         self.assertEqual(src.get_coordinates(), dest.get_coordinates())
         self.assertIsInstance(src, dest.__class__)
         self.assertIs(dest.parent, seq)
     with self.assertRaises(AssertionError):
         _ = annot.copy_annotations_to(seq[:-2])
Ejemplo n.º 6
0
    def setUp(self):
        """setUp method for all tests"""
        # named sequences
        self.rna1 = RnaSequence("UCAGGG", name="rna1")
        self.rna2 = RnaSequence("YCU-RG", name="rna2")
        self.rna3 = RnaSequence("CAA-NR", name="rna3")
        self.model1 = ArraySequence("UCAGGG",
                                    name="rna1",
                                    alphabet=RNA.alphabets.degen_gapped)
        self.model2 = ArraySequence("YCU-RG",
                                    name="rna2",
                                    alphabet=RNA.alphabets.degen_gapped)
        self.model3 = ArraySequence("CAA-NR",
                                    name="rna3",
                                    alphabet=RNA.alphabets.degen_gapped)

        self.aln = Alignment([self.rna1, self.rna2, self.rna3], moltype=RNA)
        self.da = ArrayAlignment(
            [self.model1, self.model2, self.model3],
            moltype=RNA,
            alphabet=RNA.alphabets.degen_gapped,
        )

        # seqs no name
        self.nn_rna1 = RnaSequence("UCAGGG")
        self.nn_rna2 = RnaSequence("YCU-RG")
        self.nn_rna3 = RnaSequence("CAA-NR")

        self.nn_model1 = ArraySequence("UCAGGG",
                                       alphabet=RNA.alphabets.degen_gapped)
        self.nn_model2 = ArraySequence("YCU-RG",
                                       alphabet=RNA.alphabets.degen_gapped)
        self.nn_model3 = ArraySequence("CAA-NR",
                                       alphabet=RNA.alphabets.degen_gapped)

        self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3],
                                moltype=RNA)
        self.nn_da = ArrayAlignment(
            [self.nn_model1, self.nn_model2, self.nn_model3],
            moltype=RNA,
            alphabet=RNA.alphabets.degen_gapped,
        )