def test_single(self): """RdbParser should read single record as (header,seq) tuple""" res = list(RdbParser(self.oneseq)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence("AGUCAUCUAGAUHCAUHC")) self.assertEqual( first.info, Info({ "Species": "H.Sapiens", "OriginalSeq": "AGUCAUCUAGAUHCAUHC" }), ) res = list(RdbParser(self.multiline)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence("AGUCAUUAGAUHCAUHC")) self.assertEqual( first.info, Info({ "Species": "H.Sapiens", "OriginalSeq": "AGUCAUUAGAUHCAUHC" }), )
def test_sequence_to_moltype(self): """correctly convert to specified moltype""" s = Sequence("TTTTTTTTTTAAAA", name="test1") annot1 = s.add_annotation(Feature, "exon", "fred", [(0, 10)]) annot2 = s.add_annotation(Feature, "exon", "trev", [(10, 14)]) got = s.to_moltype("rna") annot1_slice = str(annot1.get_slice()) annot2_slice = str(annot2.get_slice()) got1_slice = str(got.annotations[0].get_slice()) got2_slice = str(got.annotations[1].get_slice()) self.assertNotEqual(annot1_slice, got1_slice) self.assertEqual(annot2_slice, got2_slice) self.assertEqual(got.moltype.label, "rna") self.assertEqual(got.name, "test1") s = Sequence("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA", name="test2") xx_y = [[[2, 6], 2.4], [[10, 15], 5.1], [[25, 35], 1.3]] y_valued = s.add_annotation(Variable, "SNP", "freq", xx_y) got = s.to_moltype("rna") y_valued_slice = str(y_valued.get_slice()) got_slice = str(str(got.annotations[0].get_slice())) self.assertNotEqual(y_valued_slice, got_slice) self.assertEqual(got.moltype.label, "rna") self.assertEqual(got.name, "test2") s = Sequence("TTTTTTTTTTAAAAAAAAAA", name="test3") data = [i for i in range(20)] annot4 = s.add_annotation(SimpleVariable, "SNP", "freq", data) got = s.to_moltype(RNA) annot4_slice = str(annot4.get_slice()) got_slice = str(str(got.annotations[0].get_slice())) self.assertNotEqual(annot4_slice[:10], got_slice[:10]) self.assertEqual(annot4_slice[10:20], got_slice[10:20]) self.assertEqual(got.moltype.label, "rna") self.assertEqual(got.name, "test3")
def test_annotable_copy_to_seq(self): s = Sequence("TTTTTTTTTTAAAA", name="Orig") annot = s.add_annotation(Feature, "exon", "fred", [(0, 14)]) seq = Sequence("UUUUUUUUUUAAAA", name="Test") got = annot.copy_to_seq(seq) self.assertEqual(got._serialisable["parent"], seq) self.assertEqual(got._serialisable["type"], "exon") self.assertEqual(got._serialisable["name"], "fred") with self.assertRaises(AssertionError): got = annot.copy_to_seq( Sequence("UUUUUUUUUUUAAAA", name="Wrong_seq"))
def test_init_other_seq(self): """Sequence init with other seq should preserve name and info.""" r = self.RNA("UCAGG", name="x", info={"z": 3}) s = Sequence(r) self.assertEqual(s._seq, "UCAGG") self.assertEqual(s.name, "x") self.assertEqual(s.info.z, 3)
def test_copy(self): """correctly returns a copy version of self""" s = Sequence("TTTTTTTTTTAAAA", name="test_copy") annot1 = s.add_annotation(Feature, "exon", "annot1", [(0, 10)]) annot2 = s.add_annotation(Feature, "exon", "annot2", [(10, 14)]) got = s.copy() got_annot1 = got.get_annotations_matching( annotation_type="exon", name="annot1" )[0] got_annot2 = got.get_annotations_matching( annotation_type="exon", name="annot2" )[0] self.assertIsNot(got, s) self.assertIsNot(got_annot1, annot1) self.assertIsNot(got_annot2, annot2) self.assertEqual(got.name, s.name) self.assertEqual(got.info, s.info) self.assertEqual(got._seq, s._seq) self.assertEqual(got.moltype, s.moltype) annot1_slice = str(annot1.get_slice()) annot2_slice = str(annot2.get_slice()) got1_slice = str(got.annotations[0].get_slice()) got2_slice = str(got.annotations[1].get_slice()) self.assertEqual(annot1_slice, got1_slice) self.assertEqual(annot2_slice, got2_slice)
def test_annotate_from_gff(self): """correctly annotates a Sequence from a gff file""" from cogent3.parse.fasta import FastaParser fasta_path = os.path.join("data/c_elegans_WS199_dna_shortened.fasta") gff3_path = os.path.join("data/c_elegans_WS199_shortened_gff.gff3") name, seq = next(FastaParser(fasta_path)) sequence = Sequence(seq) sequence.annotate_from_gff(gff3_path) matches = [m for m in sequence.get_annotations_matching("*", extend_query=True)] # 13 features with one having 2 parents, so 14 instances should be found self.assertEqual(len(matches), 14)