def test_label_parser(self): """label parser factory function cope with mixed structure labels""" # the label parser factory function should correctly handle label lines # with mixed separators make = LabelParser( "%(species)s:%(accession)s", [[0, "accession", str], [2, "species", str]], split_with=": ", ) for label, expect in [ (">abcd:human:misc", "misc:abcd"), ("abcd:human:misc", "misc:abcd"), (">abcd:Human misc", "misc:abcd"), (">abcd Human:misc", "misc:abcd"), (">abcd:Human misc", "misc:abcd"), ]: self.assertEqual(make(label), expect) # should raise an assertion error if template doesn't match at least # one field name self.assertRaises( AssertionError, LabelParser, "%s:%s", [[0, "accession", str], [2, "species", str]], split_with=": ", )
def test_groups(self): """correctly yield grouped sequences from fasta formatted data""" data = [ ">group1:seq1_id:species1", "ACTG", ">group1:seq2_id:species2", "ACTG", ">group2:seq3_id:species1", "ACGT", ">group2:seq4_id:species2", "ACGT", ] expected = [ { "species1": "ACTG", "species2": "ACTG" }, { "species1": "ACGT", "species2": "ACGT" }, ] label_to_name = LabelParser( "%(species)s", [(0, "Group", str), (1, "seq_id", str), (2, "species", str)], split_with=":", ) parser = GroupFastaParser(data, label_to_name, aligned=True) count = 0 for group in parser: got = group.to_dict() want = expected[count] self.assertEqual(got, want) self.assertEqual(group.info.Group, f"group{count + 1}") count += 1 # check we don't return a done group done_groups = ["group1"] parser = GroupFastaParser(data, label_to_name, done_groups=done_groups, aligned=True) for group in parser: got = group.to_dict() want = expected[1] self.assertEqual(got, want) self.assertEqual(group.info.Group, "group2")