def test_without_escapes(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"Simple string without escape characters"') parser = EscapedParser(grammar, source) parser.parse() self.assertEqual(parser.result, r'Simple string without escape characters')
def test_only_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r' ') parser = WsParser(grammar, source) parser.parse() self.assertEqual(parser.result, [])
def test_multiple_escapes(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"Some \" and \\ characters."') parser = EscapedParser(grammar, source) parser.parse() self.assertEqual(parser.result, r'Some " and \ characters.')
def test_single_word(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r'single') parser = WsParser(grammar, source) parser.parse() self.assertEqual(parser.result, ['single'])
def test_single_integer(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('[1234]') parser = ListParser(grammar, source) parser.parse() self.assertEqual(parser.result, [1234])
def test_single_keyword_with_padding(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' universe ') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual('universe', token.value)
def test_leading_and_trailing_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r' Leading and trailing spaces ') parser = WsParser(grammar, source) parser.parse() words = [ 'Leading', 'and', 'trailing', 'spaces' ] self.assertEqual(parser.result, words)
def test_multiple_separator_spaces(self): ws_classifier = WsClassifier() grammar = Grammar(filename='grammars/words.grammar', classifier=ws_classifier) source = SourceString(r'Some simple words after each others') parser = WsParser(grammar, source) parser.parse() words = [ 'Some', 'simple', 'words', 'after', 'each', 'others' ] self.assertEqual(parser.result, words)
def test_single_text(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('"single text"') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual('single text', token.value)
def test_single_number(self): char_classifier = CharClassifier() grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('1234') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual('1234', token.value)
def test_whitespace_only_stream(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' ') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('empty', token.type) self.assertEqual('', token.value)
def test_invalid_escape_character(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'"The \t is invalid here!"') parser = EscapedParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Invalid escape character!') else: self.fail('The expected ValueError has not raised!')
def test_empty_source(self): escaped_classifier = EscapedClassifier() grammar = Grammar(filename='grammars/escaped.grammar', classifier=escaped_classifier) source = SourceString(r'') parser = EscapedParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Missing quote!') else: self.fail('The expected ValueError has not raised!')
def test_multiple_floats(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('.101, 10.20,\n\n -8.9 -7.6 -.888') parser = Tokenizer(grammar, source) numbers = ['.101', '10.20', '-8.9', '-7.6', '-.888'] while numbers: parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual(numbers.pop(0), token.value)
def test_multiple_integers(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString(' 12 34 \n -567 \n\n-8\n \n') parser = Tokenizer(grammar, source) numbers = ['12', '34', '-567', '-8'] while numbers: parser.parse() token = parser.get_token() self.assertEqual('number', token.type) self.assertEqual(numbers.pop(0), token.value)
def test_invalid_leading_character(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('invalid') parser = ListParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'Missing [ character!') else: self.fail('The expected ValueError has not raised!')
def test_multiple_texts(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('"first" "second"\n\n"third"') parser = Tokenizer(grammar, source) texts = ['first', 'second', 'third'] while texts: parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual(texts.pop(0), token.value)
def test_missing_integer(self): list_classifier = ListClassifier() grammar = Grammar(filename='grammars/integer_list.grammar', classifier=list_classifier) source = SourceString('[12, 34, , 78]') parser = ListParser(grammar, source) try: parser.parse() except ValueError as error: self.assertEqual(str(error), 'An integer expected!') else: self.fail('The expected ValueError has not raised!')
def test_quoted_texts(self): char_classifier = CharClassifier() grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString( '"\\"first\\"" "sec\\\\ond"\n\n"th\\\\\\"\\\\rd"') parser = Tokenizer(grammar, source) texts = ['"first"', 'sec\\ond', 'th\\"\\rd'] while texts: parser.parse() token = parser.get_token() self.assertEqual('text', token.type) self.assertEqual(texts.pop(0), token.value)
def test_multiple_keywords(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('universe description rule when and is end') parser = Tokenizer(grammar, source) keywords = [ 'universe', 'description', 'rule', 'when', 'and', 'is', 'end' ] while keywords: parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual(keywords.pop(0), token.value)
def test_finish_token(self): char_classifier = CharClassifier.is_in_class grammar = Grammar(filename='grammars/simple/tokenizer.grammar', classifier=char_classifier) source = SourceString('end') parser = Tokenizer(grammar, source) parser.parse() token = parser.get_token() self.assertEqual('keyword', token.type) self.assertEqual('end', token.value) parser.parse() token = parser.get_token() self.assertEqual('empty', token.type) self.assertEqual('', token.value)
def load_engine_from_string(source): """ Load the engine from string representation. :param source: the source text of the rulebase :return: an engine object """ package_directory = os.path.dirname(os.path.abspath(__file__)) char_source = SourceString(source) char_classifier = CharClassifier() tokenizer_path = os.path.join(package_directory, '../grammars/simple/tokenizer.grammar') tokenizer_grammar = Grammar(filename=tokenizer_path, classifier=char_classifier) tokenizer = Tokenizer(tokenizer_grammar, char_source) token_classifier = TokenClassifier() parser_path = os.path.join(package_directory, '../grammars/simple/parser.grammar') parser_grammar = Grammar(filename=parser_path, classifier=token_classifier) parser = Parser(parser_grammar, tokenizer) parser.parse() return parser.engine