예제 #1
0
    def test_single(self):
        """RdbParser should read single record as (header,seq) tuple"""
        res = list(RdbParser(self.oneseq))
        self.assertEqual(len(res), 1)
        first = res[0]
        self.assertEqual(first, Sequence("AGUCAUCUAGAUHCAUHC"))
        self.assertEqual(
            first.info,
            Info({
                "Species": "H.Sapiens",
                "OriginalSeq": "AGUCAUCUAGAUHCAUHC"
            }),
        )

        res = list(RdbParser(self.multiline))
        self.assertEqual(len(res), 1)
        first = res[0]
        self.assertEqual(first, Sequence("AGUCAUUAGAUHCAUHC"))
        self.assertEqual(
            first.info,
            Info({
                "Species": "H.Sapiens",
                "OriginalSeq": "AGUCAUUAGAUHCAUHC"
            }),
        )
예제 #2
0
 def test_only_sequences(self):
     """RdbParser should return empty list form file w/o lables"""
     # should fail if strict (the default)
     self.assertRaises(RecordError, list,
                       RdbParser(self.nolabels, strict=True))
     # if not strict, should skip the records
     self.assertEqual(list(RdbParser(self.nolabels, strict=False)), [])
예제 #3
0
    def test_single_constructor(self):
        """RdbParser should use constructors if supplied"""
        to_dna = lambda x, info: DnaSequence(str(x).replace("U", "T"), info=info)
        f = list(RdbParser(self.oneseq, to_dna))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, "AGTCATCTAGATHCATHC")
        self.assertEqual(
            a.info, Info({"Species": "H.Sapiens", "OriginalSeq": "AGUCAUCUAGAUHCAUHC"})
        )

        def alternativeConstr(header_lines):
            info = Info()
            for line in header_lines:
                all = line.strip().split(":", 1)
                # strip out empty lines, lines without name, lines without
                # colon
                if not all[0] or len(all) != 2:
                    continue
                name = all[0].upper()
                value = all[1].strip().upper()
                info[name] = value
            return info

        f = list(RdbParser(self.oneseq, to_dna, alternativeConstr))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, "AGTCATCTAGATHCATHC")
        exp_info = Info(
            {"OriginalSeq": "AGUCAUCUAGAUHCAUHC", "Refs": {}, "SEQ": "H.SAPIENS"}
        )
        self.assertEqual(
            a.info,
            Info({"OriginalSeq": "AGUCAUCUAGAUHCAUHC", "Refs": {}, "SEQ": "H.SAPIENS"}),
        )
예제 #4
0
    def test_full(self):
        """RdbParser: full data, valid and invalid"""
        # when only good record, should work independent of strict
        r1 = RnaSequence(
            "-??GG-UGAA--CGCU---ACGU-N???---",
            info=Info({
                "Species": "unidentified Thermus OPB AF027020",
                "Refs": {
                    "rRNA": ["AF027020"]
                },
                "OriginalSeq": "-o[oGG-U{G}AA--C^GC]U---ACGU-Nooo---",
            }),
        )
        r2 = RnaSequence(
            "---CGAUCG--UAUACG-N???-",
            info=Info({
                "Species": "Thermus silvanus X84211",
                "Refs": {
                    "rRNA": ["X84211"]
                },
                "OriginalSeq": "---CGAU[C(G){--UA}U]ACG-Nooo-",
            }),
        )
        obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=True))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)
        self.assertEqual(obs[1], r2)
        self.assertEqual(str(obs[1]), str(r2))
        self.assertEqual(obs[1].info, r2.info)

        obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=False))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)

        # when strict, should raise error on invalid record
        f = RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=True)
        self.assertRaises(RecordError, list, f)
        # when not strict, malicious record is skipped
        obs = list(RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=False))
        self.assertEqual(len(obs), 2)
        self.assertEqual(obs[0], r1)
        self.assertEqual(str(obs[0]), str(r1))
        self.assertEqual(obs[0].info, r1.info)
        self.assertEqual(obs[1], r2)
        self.assertEqual(str(obs[1]), str(r2))
        self.assertEqual(obs[1].info, r2.info)
예제 #5
0
    def test_multiple_constructor_bad(self):
        """RdbParser should complain or skip bad records w/ constructor"""

        def dnastrict(x, **kwargs):
            try:
                return DnaSequence(x, **kwargs)
            except Exception:
                raise RecordError("Could not convert sequence")

        self.assertRaises(RecordError, list, RdbParser(self.oneX, dnastrict))
        f = list(RdbParser(self.oneX, dnastrict, strict=False))
        self.assertEqual(len(f), 2)
        a, b = f

        self.assertEqual(a, "ACT")
        self.assertEqual(a.info, Info({"Species": "mit", "OriginalSeq": "ACT"}))
        self.assertEqual(b, "AAA")
        self.assertEqual(b.info, Info({"Species": "pla", "OriginalSeq": "AAA"}))
예제 #6
0
 def test_empty(self):
     """RdbParser should return empty list from 'file' w/o labels"""
     self.assertEqual(list(RdbParser(self.empty)), [])
     self.assertEqual(list(RdbParser(self.nolabels, strict=False)), [])
     self.assertRaises(RecordError, list, RdbParser(self.nolabels))