def test_referenced_expressions(self): expression = Expression() expression.add_node(1, Node(NodeType.START)) expression.add_node(2, Node(NodeType.FINISH)) expression.add_node(3, Node(NodeType.EXPRESSION, 'expression')) expression.add_edge(1, 2) expression.add_edge(1, 3) expression.add_edge(3, 2) grammar = Grammar() grammar.add_expression('sample', expression) with self.assertRaises(RuntimeError): validator.check_referenced_expressions(grammar)
def test_only_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r' ') parser = WsParser(grammar, source) parser.parse() self.assertEqual(parser.result, [])
def test_single_word(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r'single') parser = WsParser(grammar, source) parser.parse() self.assertEqual(parser.result, ['single'])
def test_without_escapes(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"Simple string without escape characters"') parser = EscapedParser(grammar, source) parser.parse() self.assertEqual(parser.result, r'Simple string without escape characters')
def test_multiple_escapes(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"Some \" and \\ characters."') parser = EscapedParser(grammar, source) parser.parse() self.assertEqual(parser.result, r'Some " and \ characters.')
def test_unique_successor_states(self): grammar = Grammar('grammars/function.grammar') successors = {5: 2, 6: 9, 10: 8, 11: 12, 13: 14, 15: 2, 16: 17, 17: 18} for source_id, target_id in successors.items(): source = State(grammar, 'function', source_id) target = State(grammar, 'function', target_id) states = source.find_successor_states() self.assertEqual(states, {target})
def test_single_integer(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('[1234]') parser = ListParser(grammar, source) parser.parse() self.assertEqual(parser.result, [1234])
def load_engine_from_string(source): """ Load the engine from string representation. :param source: the source text of the rulebase :return: an engine object """ package_directory = os.path.dirname(os.path.abspath(__file__)) char_source = SourceString(source) char_classifier = CharClassifier() tokenizer_path = os.path.join(package_directory, '../grammars/simple/tokenizer.grammar') tokenizer_grammar = Grammar(filename=tokenizer_path, classifier=char_classifier) tokenizer = Tokenizer(tokenizer_grammar, char_source) token_classifier = TokenClassifier() parser_path = os.path.join(package_directory, '../grammars/simple/parser.grammar') parser_grammar = Grammar(filename=parser_path, classifier=token_classifier) parser = Parser(parser_grammar, tokenizer) parser.parse() return parser.engine
def test_single_keyword_with_padding(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' universe ') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual('universe', token.value)
def test_leading_and_trailing_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r' Leading and trailing spaces ') parser = WsParser(grammar, source) parser.parse() words = [ 'Leading', 'and', 'trailing', 'spaces' ] self.assertEqual(parser.result, words)
def test_whitespace_only_stream(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' ') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('empty', token.type) self.assertEqual('', token.value)
def test_single_text(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('"single text"') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual('single text', token.value)
def test_single_number(self): char_classifier = CharClassifier() grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('1234') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual('1234', token.value)
def test_multiple_separator_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r'Some simple words after each others') parser = WsParser(grammar, source) parser.parse() words = [ 'Some', 'simple', 'words', 'after', 'each', 'others' ] self.assertEqual(parser.result, words)
def test_invalid_escape_character(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"The \t is invalid here!"') parser = EscapedParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Invalid escape character!') else: self.fail('The expected ValueError has not raised!')
def test_empty_source(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'') parser = EscapedParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Missing quote!') else: self.fail('The expected ValueError has not raised!')
def test_missing_integer(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('[12, 34, , 78]') parser = ListParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'An integer expected!') else: self.fail('The expected ValueError has not raised!')
def test_multiple_texts(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('"first" "second"\n\n"third"') parser = Tokenizer(grammar, source) texts = ['first', 'second', 'third'] while texts: parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual(texts.pop(0), token.value)
def test_invalid_leading_character(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('invalid') parser = ListParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Missing [ character!') else: self.fail('The expected ValueError has not raised!')
def test_multiple_floats(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('.101, 10.20,\n\n -8.9 -7.6 -.888') parser = Tokenizer(grammar, source) numbers = ['.101', '10.20', '-8.9', '-7.6', '-.888'] while numbers: parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual(numbers.pop(0), token.value)
def test_multiple_integers(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' 12 34 \n -567 \n\n-8\n \n') parser = Tokenizer(grammar, source) numbers = ['12', '34', '-567', '-8'] while numbers: parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual(numbers.pop(0), token.value)
def test_quoted_texts(self): char_classifier = CharClassifier() grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString( '"\\"first\\"" "sec\\\\ond"\n\n"th\\\\\\"\\\\rd"') parser = Tokenizer(grammar, source) texts = ['"first"', 'sec\\ond', 'th\\"\\rd'] while texts: parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual(texts.pop(0), token.value)
def test_multiple_keywords(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('universe description rule when and is end') parser = Tokenizer(grammar, source) keywords = [ 'universe', 'description', 'rule', 'when', 'and', 'is', 'end' ] while keywords: parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual(keywords.pop(0), token.value)
def test_finish_token(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('end') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual('end', token.value) parser.parse() token = parser.get_token() self.assertEqual('empty', token.type) self.assertEqual('', token.value)
def test_multiple_successor_states(self): grammar = Grammar('grammars/function.grammar') successors = { 1: [3, 11], 3: [4, 13], 4: [5, 8], 8: [6, 7], 9: [5, 10, 15] } for source_id, target_ids in successors.items(): source = State(grammar, 'function', source_id) targets = { State(grammar, 'function', target_id) for target_id in target_ids } states = source.find_successor_states() self.assertEqual(states, targets)
def test_without_matching(self): grammar = Grammar('grammars/route_samples.grammar', classifier=SampleClassifier()) state = State(grammar, 'sample', 6) token = Token('char', 'c') self.assertFalse(router.has_matching_successor(state, token))
def test_find_next_state(self): grammar = Grammar('grammars/function.grammar', classifier=FunctionClassifier()) transitions = [ { 'source': State(grammar, 'function', 1), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 3) }, { 'source': State(grammar, 'function', 1), 'token': Token('number', ''), 'target': State(grammar, 'function', 11) }, { 'source': State(grammar, 'function', 2), 'token': Token('keyword', ''), 'target': None }, { 'source': State(grammar, 'function', 3), 'token': Token('(', ''), 'target': State(grammar, 'function', 4) }, { 'source': State(grammar, 'function', 3), 'token': Token('number', ''), 'target': State(grammar, 'function', 13) }, { 'source': State(grammar, 'function', 4), 'token': Token(')', ''), 'target': State(grammar, 'function', 5) }, { 'source': State(grammar, 'function', 4), 'token': Token('number', ''), 'target': State(grammar, 'function', 8) }, { 'source': State(grammar, 'function', 4), 'token': Token('[', ''), 'target': State(grammar, 'function', 8) }, { 'source': State(grammar, 'function', 4), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 16) }, { 'source': State(grammar, 'function', 5), 'token': Token('empty', ''), 'target': State(grammar, 'function', 2) }, { 'source': State(grammar, 'function', 6), 'token': Token(')', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'function', 6), 'token': Token('comma', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'function', 6), 'token': Token('empty', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'function', 7), 'token': Token('[', ''), 'target': State(grammar, 'list', 5, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'function', 7), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 16) }, { 'source': State(grammar, 'function', 8), 'token': Token('number', ''), 'target': State(grammar, 'function', 6) }, { 'source': State(grammar, 'function', 8), 'token': Token('[', ''), 'target': State(grammar, 'function', 7) }, { 'source': State(grammar, 'function', 8), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 16) }, { 'source': State(grammar, 'function', 9), 'token': Token(')', ''), 'target': State(grammar, 'function', 5) }, { 'source': State(grammar, 'function', 9), 'token': Token('comma', ''), 'target': State(grammar, 'function', 10) }, { 'source': State(grammar, 'function', 9), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 15) }, { 'source': State(grammar, 'function', 10), 'token': Token('number', ''), 'target': State(grammar, 'function', 8) }, { 'source': State(grammar, 'function', 10), 'token': Token('[', ''), 'target': State(grammar, 'function', 8) }, { 'source': State(grammar, 'function', 10), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 16) }, { 'source': State(grammar, 'function', 11), 'token': Token('number', ''), 'target': State(grammar, 'function', 12) }, { 'source': State(grammar, 'function', 12), 'token': Token('number', ''), 'target': None }, { 'source': State(grammar, 'function', 13), 'token': Token('number', ''), 'target': State(grammar, 'function', 14) }, { 'source': State(grammar, 'function', 14), 'token': Token('number', ''), 'target': None }, { 'source': State(grammar, 'function', 15), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 2) }, { 'source': State(grammar, 'function', 16), 'token': Token('number', ''), 'target': State(grammar, 'function', 17) }, { 'source': State(grammar, 'function', 17), 'token': Token('number', ''), 'target': State(grammar, 'function', 18) }, { 'source': State(grammar, 'function', 18), 'token': Token('number', ''), 'target': None }, { 'source': State(grammar, 'list', 1, State(grammar, 'function', 7)), 'token': Token('[', ''), 'target': State(grammar, 'list', 5, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 1, State(grammar, 'function', 7)), 'token': Token('number', ''), 'target': State(grammar, 'list', 8, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 2, State(grammar, 'function', 7)), 'token': Token(')', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'list', 2, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'list', 2, State(grammar, 'function', 7)), 'token': Token('keyword', ''), 'target': State(grammar, 'function', 9) }, { 'source': State(grammar, 'list', 3, State(grammar, 'function', 7)), 'token': Token('number', ''), 'target': State(grammar, 'list', 4, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 3, State(grammar, 'function', 7)), 'token': Token('keyword', ''), 'target': State(grammar, 'list', 8, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 4, State(grammar, 'function', 7)), 'token': Token(']', ''), 'target': State(grammar, 'list', 6, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 4, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'list', 3, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 4, State(grammar, 'function', 7)), 'token': Token('keyword', ''), 'target': State(grammar, 'list', 8, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 5, State(grammar, 'function', 7)), 'token': Token(']', ''), 'target': State(grammar, 'list', 7, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 5, State(grammar, 'function', 7)), 'token': Token('number', ''), 'target': State(grammar, 'list', 4, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 5, State(grammar, 'function', 7)), 'token': Token('keyword', ''), 'target': State(grammar, 'list', 8, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 6, State(grammar, 'function', 7)), 'token': Token(')', ''), 'target': State(grammar, 'list', 2, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 6, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'list', 2, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 6, State(grammar, 'function', 7)), 'token': Token('keyword', ''), 'target': State(grammar, 'list', 2, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 7, State(grammar, 'function', 7)), 'token': Token(']', ''), 'target': State(grammar, 'list', 6, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 7, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'list', 8, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 8, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'list', 9, State(grammar, 'function', 7)) }, { 'source': State(grammar, 'list', 9, State(grammar, 'function', 7)), 'token': Token('comma', ''), 'target': State(grammar, 'list', 10, State(grammar, 'function', 7)) } ] for transition in transitions: target_state = transition['target'] if target_state is not None: self.assertEqual(router.find_next_state(transition['source'], transition['token']), target_state) else: with self.assertRaises(RuntimeError): router.find_next_state(transition['source'], transition['token'])
def test_without_default(self): grammar = Grammar('grammars/route_samples.grammar', classifier=SampleClassifier()) state = State(grammar, 'sample', 6) self.assertFalse(router.has_default_successor(state))
def test_indirect_default(self): grammar = Grammar('grammars/route_samples.grammar', classifier=SampleClassifier()) state = State(grammar, 'sample', 1) self.assertTrue(router.has_default_successor(state))
def test_multiple_matching(self): grammar = Grammar('grammars/route_samples.grammar', classifier=SampleClassifier()) state = State(grammar, 'sample', 2) token = Token('char', '*') with self.assertRaises(RuntimeError): _ = router.has_matching_successor(state, token)