def test_should_parse_def_tag_value_pair_with_empty_xrefs(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( '''def: "OK" [] {XXX="YYY"} ! some comment''') self.assertEquals(mock_callback.mock_calls, [ call.qualifier("XXX", "YYY"), call.def_tag_value('OK'), ])
def test_should_parse_def_tag_with_xref_and_brackets2(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( """def: "Any process that reduces the frequency, rate or extent of branch elongation involved in ureteric bud branching, the growth of a branch of the ureteric bud along its axis." [GOC:mtg_kidney_jan10]""" ) self.assertEquals(mock_callback.mock_calls, [ call.add_xref('GOC:mtg_kidney_jan10', None), call.def_tag_value( "Any process that reduces the frequency, rate or extent of branch elongation involved in ureteric bud branching, the growth of a branch of the ureteric bud along its axis." ), ])
def test_should_parse_xref_tag(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"''' ) self.assertEquals(mock_callback.mock_calls, [ call.add_xref( 'reactome:R-HSA-71593', "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose" ), call.xref_tag(), ])
def test_should_parse_def_tag_with_xref_and_brackets(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( """def: "Catalysis of the reaction!: {}2'-phospho-[ligated tRNA] + NAD+ = mature tRNA + ADP ribose 1'',2''-phosphate + nicotinamide + H2O. This reaction is the transfer of the splice junction 2-phosphate from ligated tRNA to NAD+ to produce ADP-ribose 1'-2' cyclic phosphate." [EC:2.7.1.160, PMID:9148937]\n""" ) self.assertEquals(mock_callback.mock_calls, [ call.add_xref('EC:2.7.1.160', None), call.add_xref('PMID:9148937', None), call.def_tag_value( "Catalysis of the reaction!: {}2'-phospho-[ligated tRNA] + NAD+ = mature tRNA + ADP ribose 1'',2''-phosphate + nicotinamide + H2O. This reaction is the transfer of the splice junction 2-phosphate from ligated tRNA to NAD+ to produce ADP-ribose 1'-2' cyclic phosphate." ), ])
def test_should_parse_def_tag_with_xref(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( '''def: "Enables the transfer of citrate, 2-hydroxy-1,2,3-propanetricarboyxlate, ''' '''from one side of a membrane to the other." [GOC:ai,HELLO "WORLD"]''' ) self.assertEquals(mock_callback.mock_calls, [ call.add_xref('GOC:ai', None), call.add_xref('HELLO', 'WORLD'), call.def_tag_value( 'Enables the transfer of citrate, 2-hydroxy-1,2,3-propanetricarboyxlate, ' 'from one side of a membrane to the other.'), ])
def test_should_parse_tag_value_pair_with_qualifiers(self): mock_callback = Mock() builder = OboLexerBuilder() builder.in_header = False OboParser(builder.new_lexer(), mock_callback).parse_line( """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is redundant with the more """ """specific 'independent and not spatial region' constraint. We leave in the redundant axiom for use """ """with reasoners that do not use negation.", XXX="YYY"} ! some comment""" ) self.assertEquals(mock_callback.mock_calls, [ call.qualifier( 'http://purl.obolibrary.org/obo/IAO_0000116', "This is redundant with the more specific 'independent and not spatial region' constraint. " "We leave in the redundant axiom for use with reasoners that do not use negation." ), call.qualifier("XXX", "YYY"), call.tag_value_pair('range', 'BFO:0000004'), ])
def setUp(self): self.under_test = OboLexerBuilder() self.lexer = self.under_test.new_lexer() self.tokenize = partial(self.under_test.tokenize, self.lexer)
class TestLexer(unittest.TestCase): def setUp(self): self.under_test = OboLexerBuilder() self.lexer = self.under_test.new_lexer() self.tokenize = partial(self.under_test.tokenize, self.lexer) def to_tokens(self, token_list): result = [] for values in token_list: token = LexToken() token.type = values[0] token.value = values[1] token.lineno = values[2] token.lexpos = values[3] token.lexer = self.lexer result.append(token) return result def test_should_recognise_header_tag_values(self): self.lexer.push_state(OboLexerBuilder.HEADER_VALUE) actual = self.tokenize("""It can contain any characters but new lines \u0145 \\a""") expected = self.to_tokens([["TAG_VALUE", "It can contain any characters but new lines \u0145 \\a", 1, 0]]) self.assertEqualsByContent(actual, expected) def test_should_recognise_escape_characters_in_header_tag_values(self): self.lexer.push_state(OboLexerBuilder.HEADER_VALUE) actual = self.tokenize("""It can contain any characters but """ """new lines \u0145 \\a\\n\\W\\t\\:\\,\\"\\\\\\(\\)\\{\\}\\[\\]@""") expected = self.to_tokens([["TAG_VALUE", """It can contain any characters but new lines \u0145 \\a \t:,\"\\(){}[]@""", 1, 0]]) self.assertEqualsByContent(actual, expected) def test_should_recognise_true_tag_values_in_header(self): self.lexer.push_state(OboLexerBuilder.HEADER_VALUE) actual = self.tokenize("""true""") expected = self.to_tokens([["BOOLEAN", "true", 1, 0]]) self.assertEqualsByContent(actual, expected) def test_should_recognise_false_tag_values_in_header(self): self.lexer.push_state(OboLexerBuilder.HEADER_VALUE) actual = self.tokenize("""false""") expected = self.to_tokens([["BOOLEAN", "false", 1, 0]]) self.assertEqualsByContent(actual, expected) def test_should_recognise_an_ending_comment(self): self.under_test.in_header = False actual = self.tokenize("""is_a: RO:0002323 ! mereotopologically related to""") expected = self.to_tokens([["TAG", "is_a", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 4], ["TAG_VALUE", "RO:0002323", 1, 6] ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_qualifiers_without_comments(self): self.under_test.in_header = False actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """ """redundant with the more specific 'independent and not spatial region' """ """constraint. We leave in the redundant axiom for use with reasoners that do """ """not use negation.", XXX="YYY"}""") expected = self.to_tokens([["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], ["QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' " "constraint. We leave in the redundant axiom for use with reasoners that do not " "use negation.", 1, 63], ["QUALIFIER_LIST_SEPARATOR", ",", 1, 235], ["QUALIFIER_ID", "XXX", 1, 238], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 241], ["QUALIFIER_VALUE", "YYY", 1, 242], ["QUALIFIER_BLOCK_END", "}", 1, 247], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_def_tag(self): self.under_test.in_header = False actual = self.tokenize('''def: "some \\"value" [SOMEID "description", ANOTHERID] {qualifier="quality"}''') expected = self.to_tokens([["DEF_TAG", "def", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 3], ["TAG_VALUE", "some \"value", 1, 5], ["XREF_LIST_START", "[", 1, 20], ["XREF", "SOMEID", 1, 21], ["XREF_DESCRIPTION", "description", 1, 28], ["XREF_LIST_SEPARATOR", ",", 1, 41], ["XREF", "ANOTHERID", 1, 43], ["XREF_LIST_END", "]", 1, 52], ["QUALIFIER_BLOCK_START", "{", 1, 54], ["QUALIFIER_ID", "qualifier", 1, 55], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 64], ["QUALIFIER_VALUE", "quality", 1, 65], ["QUALIFIER_BLOCK_END", "}", 1, 74], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_xref_block_with_escaped_characters(self): self.under_test.in_header = False self.lexer.push_state(OboLexerBuilder.XREF_LIST) actual = self.tokenize( '''[http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\\]&dispmax=50]''') expected = self.to_tokens([ ["XREF_LIST_START", "[", 1, 0], ["XREF", "http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au]&dispmax=50", 1, 1], ["XREF_LIST_END", "]", 1, 100], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_xref_tag(self): self.under_test.in_header = False actual = self.tokenize( '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"''') expected = self.to_tokens([["XREF_TAG", "xref", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 4], ["XREF", "reactome:R-HSA-71593", 1, 6], ["XREF_DESCRIPTION", "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose", 1, 27], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_single_qualifier_without_comments(self): self.under_test.in_header = False actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """ """redundant with the more specific 'independent and not spatial region' """ """constraint. We leave in the redundant axiom for use with reasoners that do """ """not use negation."}""") expected = self.to_tokens([["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], ["QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' " "constraint. We leave in the redundant axiom for use with reasoners that do not " "use negation.", 1, 63], ["QUALIFIER_BLOCK_END", "}", 1, 235], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_qualifiers_with_comments(self): self.under_test.in_header = False actual = self.tokenize("""range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This """ """is redundant with the more specific 'independent and not spatial """ """region' constraint. We leave in the redundant axiom for use with """ """reasoners that do not use negation."} ! independent continuant""") expected = self.to_tokens([ ["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], ["QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], ["QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' constraint." " We leave in the redundant axiom for use with reasoners that do not use negation.", 1, 63], ["QUALIFIER_BLOCK_END", "}", 1, 235], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_new_lines(self): actual = self.tokenize(""" a_valid_tag-AZ_8""") expected = self.to_tokens([["TAG", "a_valid_tag-AZ_8", 2, 9]]) self.assertEqualsByContent(actual, expected) def test_should_ignore_spaces_and_tab(self): actual = self.tokenize(""" a_valid_tag-AZ_8: \tIt can contain any characters \t but new lines \u0145 \\a""") expected = self.to_tokens([ ["TAG", "a_valid_tag-AZ_8", 1, 2], ["TAG_VALUE_SEPARATOR", ":", 1, 18], ["TAG_VALUE", "It can contain any characters \t but new lines \u0145 \\a", 1, 21] ]) self.assertEqualsByContent(actual, expected) def test_should_fail_on_invalid_character(self): with self.assertRaises(OboParsingError): self.tokenize("""==:""") def assertEqualsByContent(self, actual, expected): def extract_dictionary(list): return [x.__dict__ for x in list] actual_dict = extract_dictionary(actual) expected_dict = extract_dictionary(expected) self.assertEquals(actual_dict, expected_dict)
def test_should_parse_tag_value_pair(self): mock_callback = Mock() OboParser(OboLexerBuilder().new_lexer(), mock_callback).parse_line("format-version: 1.2") self.assertEquals(mock_callback.mock_calls, [call.tag_value_pair('format-version', '1.2')])
def test_should_parse_boolean_tag_value_pair(self): mock_callback = Mock() OboParser(OboLexerBuilder().new_lexer(), mock_callback).parse_line("is_anonymous: true") self.assertEquals(mock_callback.mock_calls, [call.boolean_tag_value_pair('is_anonymous', True)])
def test_should_parse_term(self): mock_callback = Mock() OboParser(OboLexerBuilder().new_lexer(), mock_callback).parse_line("[Term]") self.assertEquals(mock_callback.mock_calls, [call.term()])
class OboLexerBuilderTest(unittest.TestCase): def setUp(self): self.under_test = OboLexerBuilder() self.lexer = self.under_test.new_lexer() self.tokenize = partial(self.under_test.tokenize, self.lexer) def to_tokens(self, token_list): result = [] for values in token_list: token = LexToken() token.type = values[0] token.value = values[1] token.lineno = values[2] token.lexpos = values[3] token.lexer = self.lexer result.append(token) return result def test_should_recognise_a_tag_and_ignore_an_ending_comment(self): self.under_test.in_header = False actual = self.tokenize( """is_a: RO:0002323 ! mereotopologically related to""") expected = self.to_tokens([["TAG", "is_a", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 4], ["TAG_VALUE", "RO:0002323", 1, 6]]) self.assertEqualsByContent(actual, expected) def test_should_recognise_qualifiers_without_comments(self): self.under_test.in_header = False actual = self.tokenize( """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """ """redundant with the more specific 'independent and not spatial region' """ """constraint. We leave in the redundant axiom for use with reasoners that do """ """not use negation.", XXX="YYY"}""") expected = self.to_tokens([ ["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], [ "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20 ], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], [ "QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' " "constraint. We leave in the redundant axiom for use with reasoners that do not " "use negation.", 1, 63 ], ["QUALIFIER_LIST_SEPARATOR", ",", 1, 235], ["QUALIFIER_ID", "XXX", 1, 238], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 241], ["QUALIFIER_VALUE", "YYY", 1, 242], ["QUALIFIER_BLOCK_END", "}", 1, 247], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_def_tag(self): self.under_test.in_header = False actual = self.tokenize( '''def: "some \\"value" [SOMEID "description", ANOTHERID] {qualifier="quality"}''' ) expected = self.to_tokens([ ["DEF_TAG", "def", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 3], ["TAG_VALUE", "some \"value", 1, 5], ["XREF_LIST_START", "[", 1, 20], ["XREF", "SOMEID", 1, 21], ["XREF_DESCRIPTION", "description", 1, 28], ["XREF_LIST_SEPARATOR", ",", 1, 41], ["XREF", "ANOTHERID", 1, 43], ["XREF_LIST_END", "]", 1, 52], ["QUALIFIER_BLOCK_START", "{", 1, 54], ["QUALIFIER_ID", "qualifier", 1, 55], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 64], ["QUALIFIER_VALUE", "quality", 1, 65], ["QUALIFIER_BLOCK_END", "}", 1, 74], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_xref_tag(self): self.under_test.in_header = False actual = self.tokenize( '''xref: reactome:R-HSA-71593 "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose"''' ) expected = self.to_tokens([ ["XREF_TAG", "xref", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 4], ["XREF", "reactome:R-HSA-71593", 1, 6], [ "XREF_DESCRIPTION", "((1,6)-alpha-glucosyl)poly((1,4)-alpha-glucosyl)glycogenin => poly{(1,4)-alpha-glucosyl} glycogenin + alpha-D-glucose", 1, 27 ], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_single_qualifier_without_comments(self): self.under_test.in_header = False actual = self.tokenize( """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This is """ """redundant with the more specific 'independent and not spatial region' """ """constraint. We leave in the redundant axiom for use with reasoners that do """ """not use negation."}""") expected = self.to_tokens([ ["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], [ "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20 ], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], [ "QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' " "constraint. We leave in the redundant axiom for use with reasoners that do not " "use negation.", 1, 63 ], ["QUALIFIER_BLOCK_END", "}", 1, 235], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_qualifiers_with_comments(self): self.under_test.in_header = False actual = self.tokenize( """range: BFO:0000004 {http://purl.obolibrary.org/obo/IAO_0000116="This """ """is redundant with the more specific 'independent and not spatial """ """region' constraint. We leave in the redundant axiom for use with """ """reasoners that do not use negation."} ! independent continuant""" ) expected = self.to_tokens([ ["TAG", "range", 1, 0], ["TAG_VALUE_SEPARATOR", ":", 1, 5], ["TAG_VALUE", "BFO:0000004", 1, 7], ["QUALIFIER_BLOCK_START", "{", 1, 19], [ "QUALIFIER_ID", "http://purl.obolibrary.org/obo/IAO_0000116", 1, 20 ], ["QUALIFIER_ID_VALUE_SEPARATOR", "=", 1, 62], [ "QUALIFIER_VALUE", "This is redundant with the more specific 'independent and not spatial region' constraint." " We leave in the redundant axiom for use with reasoners that do not use negation.", 1, 63 ], ["QUALIFIER_BLOCK_END", "}", 1, 235], ]) self.assertEqualsByContent(actual, expected) def test_should_recognise_new_lines(self): actual = self.tokenize(""" a_valid_tag-AZ_8""") expected = self.to_tokens([["TAG", "a_valid_tag-AZ_8", 2, 9]]) self.assertEqualsByContent(actual, expected) def test_should_ignore_spaces_and_tab(self): actual = self.tokenize( """ \t a_valid_tag-AZ_8: \tIt can contain any characters \t but new lines \u0145 \\a""" ) expected = self.to_tokens( [["TAG", "a_valid_tag-AZ_8", 1, 3], ["TAG_VALUE_SEPARATOR", ":", 1, 19], [ "TAG_VALUE", "It can contain any characters \t but new lines \u0145 \\a", 1, 22 ]]) self.assertEqualsByContent(actual, expected) def assertEqualsByContent(self, actual, expected): def extract_dictionary(list): return [x.__dict__ for x in list] actual_dict = extract_dictionary(actual) expected_dict = extract_dictionary(expected) self.assertEquals(actual_dict, expected_dict)
def read(line_generator): builder = OboDocumentBuilder() parser = OboParser(OboLexerBuilder().new_lexer(), builder) parser.parse(line_generator) return builder.document
if __name__ == "__main__": class ShowParsing: def __init__(self): pass def boolean_tag_value_pair(self, tag_token, value_token): print("boolean_value_tag %s %s" % (tag_token, value_token)) def tag_value_pair(self, tag_token, value_token): print("single_value_tag %s %s" % (tag_token, value_token)) def qualifier(self, id, value): print("qualifier %s %s" % (id, value)) def typedef(self): print("typedef") def term(self): print("term") OboParser(OboLexerBuilder().new_lexer(), ShowParsing()).parse(line for line in [ "[Term]", "tag: value", "[Typedef]", """tag2: value2 {q1="v1"}""", """tag3: value3 {q2="v2", q3="v3"}""", """tag4: true {q1="v1"}""", ])