Ejemplo n.º 1
0
    def test_single(self):
        """RdbParser should read single record as (header,seq) tuple"""
        res = list(RdbParser(self.oneseq))
        self.assertEqual(len(res), 1)
        first = res[0]
        self.assertEqual(first, Sequence("AGUCAUCUAGAUHCAUHC"))
        self.assertEqual(
            first.info,
            Info({
                "Species": "H.Sapiens",
                "OriginalSeq": "AGUCAUCUAGAUHCAUHC"
            }),
        )

        res = list(RdbParser(self.multiline))
        self.assertEqual(len(res), 1)
        first = res[0]
        self.assertEqual(first, Sequence("AGUCAUUAGAUHCAUHC"))
        self.assertEqual(
            first.info,
            Info({
                "Species": "H.Sapiens",
                "OriginalSeq": "AGUCAUUAGAUHCAUHC"
            }),
        )
Ejemplo n.º 2
0
    def test_sequence_to_moltype(self):
        """correctly convert to specified moltype"""
        s = Sequence("TTTTTTTTTTAAAA", name="test1")
        annot1 = s.add_annotation(Feature, "exon", "fred", [(0, 10)])
        annot2 = s.add_annotation(Feature, "exon", "trev", [(10, 14)])
        got = s.to_moltype("rna")
        annot1_slice = str(annot1.get_slice())
        annot2_slice = str(annot2.get_slice())
        got1_slice = str(got.annotations[0].get_slice())
        got2_slice = str(got.annotations[1].get_slice())
        self.assertNotEqual(annot1_slice, got1_slice)
        self.assertEqual(annot2_slice, got2_slice)
        self.assertEqual(got.moltype.label, "rna")
        self.assertEqual(got.name, "test1")

        s = Sequence("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA", name="test2")
        xx_y = [[[2, 6], 2.4], [[10, 15], 5.1], [[25, 35], 1.3]]
        y_valued = s.add_annotation(Variable, "SNP", "freq", xx_y)
        got = s.to_moltype("rna")
        y_valued_slice = str(y_valued.get_slice())
        got_slice = str(str(got.annotations[0].get_slice()))
        self.assertNotEqual(y_valued_slice, got_slice)
        self.assertEqual(got.moltype.label, "rna")
        self.assertEqual(got.name, "test2")

        s = Sequence("TTTTTTTTTTAAAAAAAAAA", name="test3")
        data = [i for i in range(20)]
        annot4 = s.add_annotation(SimpleVariable, "SNP", "freq", data)
        got = s.to_moltype(RNA)
        annot4_slice = str(annot4.get_slice())
        got_slice = str(str(got.annotations[0].get_slice()))
        self.assertNotEqual(annot4_slice[:10], got_slice[:10])
        self.assertEqual(annot4_slice[10:20], got_slice[10:20])
        self.assertEqual(got.moltype.label, "rna")
        self.assertEqual(got.name, "test3")
Ejemplo n.º 3
0
    def test_annotable_copy_to_seq(self):
        s = Sequence("TTTTTTTTTTAAAA", name="Orig")
        annot = s.add_annotation(Feature, "exon", "fred", [(0, 14)])
        seq = Sequence("UUUUUUUUUUAAAA", name="Test")
        got = annot.copy_to_seq(seq)
        self.assertEqual(got._serialisable["parent"], seq)
        self.assertEqual(got._serialisable["type"], "exon")
        self.assertEqual(got._serialisable["name"], "fred")

        with self.assertRaises(AssertionError):
            got = annot.copy_to_seq(
                Sequence("UUUUUUUUUUUAAAA", name="Wrong_seq"))
Ejemplo n.º 4
0
 def test_init_other_seq(self):
     """Sequence init with other seq should preserve name and info."""
     r = self.RNA("UCAGG", name="x", info={"z": 3})
     s = Sequence(r)
     self.assertEqual(s._seq, "UCAGG")
     self.assertEqual(s.name, "x")
     self.assertEqual(s.info.z, 3)
Ejemplo n.º 5
0
 def test_copy(self):
     """correctly returns a copy version of self"""
     s = Sequence("TTTTTTTTTTAAAA", name="test_copy")
     annot1 = s.add_annotation(Feature, "exon", "annot1", [(0, 10)])
     annot2 = s.add_annotation(Feature, "exon", "annot2", [(10, 14)])
     got = s.copy()
     got_annot1 = got.get_annotations_matching(
         annotation_type="exon", name="annot1"
     )[0]
     got_annot2 = got.get_annotations_matching(
         annotation_type="exon", name="annot2"
     )[0]
     self.assertIsNot(got, s)
     self.assertIsNot(got_annot1, annot1)
     self.assertIsNot(got_annot2, annot2)
     self.assertEqual(got.name, s.name)
     self.assertEqual(got.info, s.info)
     self.assertEqual(got._seq, s._seq)
     self.assertEqual(got.moltype, s.moltype)
     annot1_slice = str(annot1.get_slice())
     annot2_slice = str(annot2.get_slice())
     got1_slice = str(got.annotations[0].get_slice())
     got2_slice = str(got.annotations[1].get_slice())
     self.assertEqual(annot1_slice, got1_slice)
     self.assertEqual(annot2_slice, got2_slice)
Ejemplo n.º 6
0
    def test_annotate_from_gff(self):
        """correctly annotates a Sequence from a gff file"""
        from cogent3.parse.fasta import FastaParser

        fasta_path = os.path.join("data/c_elegans_WS199_dna_shortened.fasta")
        gff3_path = os.path.join("data/c_elegans_WS199_shortened_gff.gff3")
        name, seq = next(FastaParser(fasta_path))

        sequence = Sequence(seq)
        sequence.annotate_from_gff(gff3_path)
        matches = [m for m in sequence.get_annotations_matching("*", extend_query=True)]
        # 13 features with one having 2 parents, so 14 instances should be found
        self.assertEqual(len(matches), 14)