Example #1
0
 def test_should_parse_def_tag_value_pair_with_empty_xrefs(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         '''def: "OK" [] {XXX="YYY"} ! some comment''')
     self.assertEquals(mock_callback.mock_calls, [
         call.qualifier("XXX", "YYY"),
         call.def_tag_value('OK'),
     ])
Example #2
0
 def test_should_parse_def_tag_with_xref_and_brackets2(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         """def: "Any process that reduces the frequency, rate or extent of branch elongation involved in ureteric bud branching, the growth of a branch of the ureteric bud along its axis." [GOC:mtg_kidney_jan10]"""
     )
     self.assertEquals(mock_callback.mock_calls, [
         call.add_xref('GOC:mtg_kidney_jan10', None),
         call.def_tag_value(
             "Any process that reduces the frequency, rate or extent of branch elongation involved in ureteric bud branching, the growth of a branch of the ureteric bud along its axis."
         ),
     ])
Example #3
0
 def test_should_parse_xref_tag(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"'''
     )
     self.assertEquals(mock_callback.mock_calls, [
         call.add_xref(
             'reactome:R-HSA-71593',
             "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"
         ),
         call.xref_tag(),
     ])
Example #4
0
 def test_should_parse_def_tag_with_xref_and_brackets(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         """def: "Catalysis of the reaction!: {}2'-phospho-[ligated tRNA] + NAD+ = mature tRNA + ADP ribose 1'',2''-phosphate + nicotinamide + H2O. This reaction is the transfer of the splice junction 2-phosphate from ligated tRNA to NAD+ to produce ADP-ribose 1'-2' cyclic phosphate." [EC:2.7.1.160, PMID:9148937]\n"""
     )
     self.assertEquals(mock_callback.mock_calls, [
         call.add_xref('EC:2.7.1.160', None),
         call.add_xref('PMID:9148937', None),
         call.def_tag_value(
             "Catalysis of the reaction!: {}2'-phospho-[ligated tRNA] + NAD+ = mature tRNA + ADP ribose 1'',2''-phosphate + nicotinamide + H2O. This reaction is the transfer of the splice junction 2-phosphate from ligated tRNA to NAD+ to produce ADP-ribose 1'-2' cyclic phosphate."
         ),
     ])
Example #5
0
 def test_should_parse_def_tag_with_xref(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         '''def: "Enables the transfer of citrate, 2-hydroxy-1,2,3-propanetricarboyxlate, '''
         '''from one side of a membrane to the other." [GOC:ai,HELLO "WORLD"]'''
     )
     self.assertEquals(mock_callback.mock_calls, [
         call.add_xref('GOC:ai', None),
         call.add_xref('HELLO', 'WORLD'),
         call.def_tag_value(
             'Enables the transfer of citrate, 2-hydroxy-1,2,3-propanetricarboyxlate, '
             'from one side of a membrane to the other.'),
     ])
Example #6
0
 def test_should_parse_tag_value_pair_with_qualifiers(self):
     mock_callback = Mock()
     builder = OboLexerBuilder()
     builder.in_header = False
     OboParser(builder.new_lexer(), mock_callback).parse_line(
         """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is redundant with the more """
         """specific 'independent and not spatial region' constraint. We leave in the redundant axiom for use """
         """with reasoners that do not use negation.",  XXX="YYY"} ! some comment"""
     )
     self.assertEquals(mock_callback.mock_calls, [
         call.qualifier(
             'http://purl.obolibrary.org/obo/IAO_0000116',
             "This is redundant with the more specific 'independent and not spatial region' constraint. "
             "We leave in the redundant axiom for use with reasoners that do not use negation."
         ),
         call.qualifier("XXX", "YYY"),
         call.tag_value_pair('range', 'BFO:0000004'),
     ])
Example #7
0
class TestLexer(unittest.TestCase):

    def setUp(self):
        self.under_test = OboLexerBuilder()
        self.lexer = self.under_test.new_lexer()
        self.tokenize = partial(self.under_test.tokenize, self.lexer)

    def to_tokens(self, token_list):
        result = []
        for values in token_list:
            token = LexToken()
            token.type = values[0]
            token.value = values[1]
            token.lineno = values[2]
            token.lexpos = values[3]
            token.lexer = self.lexer
            result.append(token)
        return result

    def test_should_recognise_header_tag_values(self):
        self.lexer.push_state(OboLexerBuilder.HEADER_VALUE)
        actual = self.tokenize("""It can contain any characters but new lines \u0145 \\a""")
        expected = self.to_tokens([["TAG_VALUE", "It can contain any characters but new lines \u0145 \\a", 1, 0]])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_escape_characters_in_header_tag_values(self):
        self.lexer.push_state(OboLexerBuilder.HEADER_VALUE)
        actual = self.tokenize("""It can contain any characters but """
                               """new lines \u0145 \\a\\n\\W\\t\\:\\,\\"\\\\\\(\\)\\{\\}\\[\\]@""")
        expected = self.to_tokens([["TAG_VALUE", """It can contain any characters but new lines \u0145 \\a
 \t:,\"\\(){}[]@""", 1, 0]])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_true_tag_values_in_header(self):
        self.lexer.push_state(OboLexerBuilder.HEADER_VALUE)
        actual = self.tokenize("""true""")
        expected = self.to_tokens([["BOOLEAN", "true", 1, 0]])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_false_tag_values_in_header(self):
        self.lexer.push_state(OboLexerBuilder.HEADER_VALUE)
        actual = self.tokenize("""false""")
        expected = self.to_tokens([["BOOLEAN", "false", 1, 0]])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_an_ending_comment(self):
        self.under_test.in_header = False
        actual = self.tokenize("""is_a: RO:0002323 ! mereotopologically related to""")
        expected = self.to_tokens([["TAG", "is_a", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 4],
                                   ["TAG_VALUE", "RO:0002323", 1, 6]
                                   ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_qualifiers_without_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """
                               """redundant with the more specific 'independent and not spatial region' """
                               """constraint. We leave in the redundant axiom for use with reasoners that do """
                               """not use negation.",  XXX="YYY"}""")
        expected = self.to_tokens([["TAG", "range", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 5],
                                   ["TAG_VALUE", "BFO:0000004", 1, 7],
                                   ["QUALIFIER_BLOCK_START", "{", 1, 19],
                                   ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20],
                                   ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
                                   ["QUALIFIER_VALUE",
                                    "This is redundant with the more specific 'independent and not spatial region' "
                                    "constraint. We leave in the redundant axiom for use with reasoners that do not "
                                    "use negation.", 1, 63],
                                   ["QUALIFIER_LIST_SEPARATOR", ",", 1, 235],
                                   ["QUALIFIER_ID", "XXX", 1, 238],
                                   ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 241],
                                   ["QUALIFIER_VALUE", "YYY", 1, 242],
                                   ["QUALIFIER_BLOCK_END", "}", 1, 247],
                                   ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_def_tag(self):
        self.under_test.in_header = False
        actual = self.tokenize('''def: "some \\"value" [SOMEID "description", ANOTHERID] {qualifier="quality"}''')
        expected = self.to_tokens([["DEF_TAG", "def", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 3],
                                   ["TAG_VALUE", "some \"value", 1, 5],
                                   ["XREF_LIST_START", "[", 1, 20],
                                   ["XREF", "SOMEID", 1, 21],
                                   ["XREF_DESCRIPTION", "description", 1, 28],
                                   ["XREF_LIST_SEPARATOR", ",", 1, 41],
                                   ["XREF", "ANOTHERID", 1, 43],
                                   ["XREF_LIST_END", "]", 1, 52],
                                   ["QUALIFIER_BLOCK_START", "{", 1, 54],
                                   ["QUALIFIER_ID", "qualifier", 1, 55],
                                   ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 64],
                                   ["QUALIFIER_VALUE", "quality", 1, 65],
                                   ["QUALIFIER_BLOCK_END", "}", 1, 74],
                                   ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_xref_block_with_escaped_characters(self):
        self.under_test.in_header = False
        self.lexer.push_state(OboLexerBuilder.XREF_LIST)
        actual = self.tokenize(
            '''[http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\\]&dispmax=50]''')
        expected = self.to_tokens([
            ["XREF_LIST_START", "[", 1, 0],
            ["XREF",
             "http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au]&dispmax=50", 1,
             1],
            ["XREF_LIST_END", "]", 1, 100],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_xref_tag(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"''')
        expected = self.to_tokens([["XREF_TAG", "xref", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 4],
                                   ["XREF", "reactome:R-HSA-71593", 1, 6],
                                   ["XREF_DESCRIPTION",
                                    "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose",
                                    1, 27],
                                   ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_single_qualifier_without_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """
                               """redundant with the more specific 'independent and not spatial region' """
                               """constraint. We leave in the redundant axiom for use with reasoners that do """
                               """not use negation."}""")
        expected = self.to_tokens([["TAG", "range", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 5],
                                   ["TAG_VALUE", "BFO:0000004", 1, 7],
                                   ["QUALIFIER_BLOCK_START", "{", 1, 19],
                                   ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20],
                                   ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
                                   ["QUALIFIER_VALUE",
                                    "This is redundant with the more specific 'independent and not spatial region' "
                                    "constraint. We leave in the redundant axiom for use with reasoners that do not "
                                    "use negation.", 1, 63],
                                   ["QUALIFIER_BLOCK_END", "}", 1, 235],
                                   ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_qualifiers_with_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This """
                               """is redundant with the more specific 'independent and not spatial """
                               """region' constraint. We leave in the redundant axiom for use with """
                               """reasoners that do not use negation."} ! independent continuant""")
        expected = self.to_tokens([
            ["TAG", "range", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 5],
            ["TAG_VALUE", "BFO:0000004", 1, 7],
            ["QUALIFIER_BLOCK_START", "{", 1, 19],
            ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
            ["QUALIFIER_VALUE",
             "This is redundant with the more specific 'independent and not spatial region' constraint."
             " We leave in the redundant axiom for use with reasoners that do not use negation.", 1, 63],
            ["QUALIFIER_BLOCK_END", "}", 1, 235],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_new_lines(self):
        actual = self.tokenize("""
        a_valid_tag-AZ_8""")
        expected = self.to_tokens([["TAG", "a_valid_tag-AZ_8", 2, 9]])
        self.assertEqualsByContent(actual, expected)

    def test_should_ignore_spaces_and_tab(self):
        actual = self.tokenize("""  a_valid_tag-AZ_8: \tIt can contain any characters \t but new lines \u0145 \\a""")
        expected = self.to_tokens([
            ["TAG", "a_valid_tag-AZ_8", 1, 2],
            ["TAG_VALUE_SEPARATOR", ":", 1, 18],
            ["TAG_VALUE", "It can contain any characters \t but new lines \u0145 \\a", 1, 21]
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_fail_on_invalid_character(self):
        with self.assertRaises(OboParsingError):
            self.tokenize("""==:""")

    def assertEqualsByContent(self, actual, expected):
        def extract_dictionary(list):
            return [x.__dict__ for x in list]

        actual_dict = extract_dictionary(actual)
        expected_dict = extract_dictionary(expected)
        self.assertEquals(actual_dict, expected_dict)
Example #8
0
class OboLexerBuilderTest(unittest.TestCase):
    def setUp(self):
        self.under_test = OboLexerBuilder()
        self.lexer = self.under_test.new_lexer()
        self.tokenize = partial(self.under_test.tokenize, self.lexer)

    def to_tokens(self, token_list):
        result = []
        for values in token_list:
            token = LexToken()
            token.type = values[0]
            token.value = values[1]
            token.lineno = values[2]
            token.lexpos = values[3]
            token.lexer = self.lexer
            result.append(token)
        return result

    def test_should_recognise_a_tag_and_ignore_an_ending_comment(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            """is_a: RO:0002323 ! mereotopologically related to""")
        expected = self.to_tokens([["TAG", "is_a", 1, 0],
                                   ["TAG_VALUE_SEPARATOR", ":", 1, 4],
                                   ["TAG_VALUE", "RO:0002323", 1, 6]])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_qualifiers_without_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """
            """redundant with the more specific 'independent and not spatial region' """
            """constraint. We leave in the redundant axiom for use with reasoners that do """
            """not use negation.",  XXX="YYY"}""")
        expected = self.to_tokens([
            ["TAG", "range", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 5],
            ["TAG_VALUE", "BFO:0000004", 1, 7],
            ["QUALIFIER_BLOCK_START", "{", 1, 19],
            [
                "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116",
                1, 20
            ],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
            [
                "QUALIFIER_VALUE",
                "This is redundant with the more specific 'independent and not spatial region' "
                "constraint. We leave in the redundant axiom for use with reasoners that do not "
                "use negation.", 1, 63
            ],
            ["QUALIFIER_LIST_SEPARATOR", ",", 1, 235],
            ["QUALIFIER_ID", "XXX", 1, 238],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 241],
            ["QUALIFIER_VALUE", "YYY", 1, 242],
            ["QUALIFIER_BLOCK_END", "}", 1, 247],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_def_tag(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            '''def: "some \\"value" [SOMEID "description", ANOTHERID] {qualifier="quality"}'''
        )
        expected = self.to_tokens([
            ["DEF_TAG", "def", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 3],
            ["TAG_VALUE", "some \"value", 1, 5],
            ["XREF_LIST_START", "[", 1, 20],
            ["XREF", "SOMEID", 1, 21],
            ["XREF_DESCRIPTION", "description", 1, 28],
            ["XREF_LIST_SEPARATOR", ",", 1, 41],
            ["XREF", "ANOTHERID", 1, 43],
            ["XREF_LIST_END", "]", 1, 52],
            ["QUALIFIER_BLOCK_START", "{", 1, 54],
            ["QUALIFIER_ID", "qualifier", 1, 55],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 64],
            ["QUALIFIER_VALUE", "quality", 1, 65],
            ["QUALIFIER_BLOCK_END", "}", 1, 74],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_xref_tag(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"'''
        )
        expected = self.to_tokens([
            ["XREF_TAG", "xref", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 4],
            ["XREF", "reactome:R-HSA-71593", 1, 6],
            [
                "XREF_DESCRIPTION",
                "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose",
                1, 27
            ],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_single_qualifier_without_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """
            """redundant with the more specific 'independent and not spatial region' """
            """constraint. We leave in the redundant axiom for use with reasoners that do """
            """not use negation."}""")
        expected = self.to_tokens([
            ["TAG", "range", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 5],
            ["TAG_VALUE", "BFO:0000004", 1, 7],
            ["QUALIFIER_BLOCK_START", "{", 1, 19],
            [
                "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116",
                1, 20
            ],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
            [
                "QUALIFIER_VALUE",
                "This is redundant with the more specific 'independent and not spatial region' "
                "constraint. We leave in the redundant axiom for use with reasoners that do not "
                "use negation.", 1, 63
            ],
            ["QUALIFIER_BLOCK_END", "}", 1, 235],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_qualifiers_with_comments(self):
        self.under_test.in_header = False
        actual = self.tokenize(
            """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This """
            """is redundant with the more specific 'independent and not spatial """
            """region' constraint. We leave in the redundant axiom for use with """
            """reasoners that do not use negation."} ! independent continuant"""
        )
        expected = self.to_tokens([
            ["TAG", "range", 1, 0],
            ["TAG_VALUE_SEPARATOR", ":", 1, 5],
            ["TAG_VALUE", "BFO:0000004", 1, 7],
            ["QUALIFIER_BLOCK_START", "{", 1, 19],
            [
                "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116",
                1, 20
            ],
            ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62],
            [
                "QUALIFIER_VALUE",
                "This is redundant with the more specific 'independent and not spatial region' constraint."
                " We leave in the redundant axiom for use with reasoners that do not use negation.",
                1, 63
            ],
            ["QUALIFIER_BLOCK_END", "}", 1, 235],
        ])
        self.assertEqualsByContent(actual, expected)

    def test_should_recognise_new_lines(self):
        actual = self.tokenize("""
        a_valid_tag-AZ_8""")
        expected = self.to_tokens([["TAG", "a_valid_tag-AZ_8", 2, 9]])
        self.assertEqualsByContent(actual, expected)

    def test_should_ignore_spaces_and_tab(self):
        actual = self.tokenize(
            """ \t a_valid_tag-AZ_8: \tIt can contain any characters \t but new lines \u0145 \\a"""
        )
        expected = self.to_tokens(
            [["TAG", "a_valid_tag-AZ_8", 1, 3],
             ["TAG_VALUE_SEPARATOR", ":", 1, 19],
             [
                 "TAG_VALUE",
                 "It can contain any characters \t but new lines \u0145 \\a",
                 1, 22
             ]])
        self.assertEqualsByContent(actual, expected)

    def assertEqualsByContent(self, actual, expected):
        def extract_dictionary(list):
            return [x.__dict__ for x in list]

        actual_dict = extract_dictionary(actual)
        expected_dict = extract_dictionary(expected)
        self.assertEquals(actual_dict, expected_dict)