Example #1
0
    def test_label_parser(self):
        """label parser factory function cope with mixed structure labels"""
        # the label parser factory function should correctly handle label lines
        # with mixed separators
        make = LabelParser(
            "%(species)s:%(accession)s",
            [[0, "accession", str], [2, "species", str]],
            split_with=": ",
        )
        for label, expect in [
            (">abcd:human:misc", "misc:abcd"),
            ("abcd:human:misc", "misc:abcd"),
            (">abcd:Human misc", "misc:abcd"),
            (">abcd Human:misc", "misc:abcd"),
            (">abcd:Human misc", "misc:abcd"),
        ]:
            self.assertEqual(make(label), expect)

        # should raise an assertion error if template doesn't match at least
        # one field name
        self.assertRaises(
            AssertionError,
            LabelParser,
            "%s:%s",
            [[0, "accession", str], [2, "species", str]],
            split_with=": ",
        )
Example #2
0
    def test_groups(self):
        """correctly yield grouped sequences from fasta formatted data"""
        data = [
            ">group1:seq1_id:species1",
            "ACTG",
            ">group1:seq2_id:species2",
            "ACTG",
            ">group2:seq3_id:species1",
            "ACGT",
            ">group2:seq4_id:species2",
            "ACGT",
        ]
        expected = [
            {
                "species1": "ACTG",
                "species2": "ACTG"
            },
            {
                "species1": "ACGT",
                "species2": "ACGT"
            },
        ]
        label_to_name = LabelParser(
            "%(species)s",
            [(0, "Group", str), (1, "seq_id", str), (2, "species", str)],
            split_with=":",
        )
        parser = GroupFastaParser(data, label_to_name, aligned=True)
        count = 0
        for group in parser:
            got = group.to_dict()
            want = expected[count]
            self.assertEqual(got, want)
            self.assertEqual(group.info.Group, f"group{count + 1}")
            count += 1

        # check we don't return a done group
        done_groups = ["group1"]
        parser = GroupFastaParser(data,
                                  label_to_name,
                                  done_groups=done_groups,
                                  aligned=True)
        for group in parser:
            got = group.to_dict()
            want = expected[1]
            self.assertEqual(got, want)
            self.assertEqual(group.info.Group, "group2")