Beispiel #1
0
 def test_token_label(self):
     tokens = list(Lexer('abc:[] abc:"我們"').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.TOKEN_LABEL, 'abc'),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_LABEL, 'abc'),
         Token(TokenType.DEFAULT_TOKEN, '我們'),
     ])
Beispiel #2
0
 def test_group(self):
     tokens = list(Lexer('([] "a")+').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.LPAREN),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.DEFAULT_TOKEN, "a"),
         Token(TokenType.RPAREN),
         Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')),
     ])    
Beispiel #3
0
 def test_token_quatifier(self):
     tokens = list(Lexer('"我們"{1,2} "我們"{2} "我們"? "我們"* "我們"+').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.DEFAULT_TOKEN, "我們"),
         Token(TokenType.TOKEN_QUANTIFIER, (1, 2)),
         Token(TokenType.DEFAULT_TOKEN, "我們"),
         Token(TokenType.TOKEN_QUANTIFIER, (2, 2)),
         Token(TokenType.DEFAULT_TOKEN, "我們"),
         Token(TokenType.TOKEN_QUANTIFIER, (0, 1)),
         Token(TokenType.DEFAULT_TOKEN, "我們"),
         Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')),
         Token(TokenType.DEFAULT_TOKEN, "我們"),
         Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')),
     ])
Beispiel #4
0
 def test_token_quatifier(self):
     tokens = list(Lexer('[]{1,2} []{2} []? []* []+').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_QUANTIFIER, (1, 2)),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_QUANTIFIER, (2, 2)),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_QUANTIFIER, (0, 1)),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')),
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"),
         Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')),
     ])
Beispiel #5
0
 def test_attr(self):
     tokens = list(Lexer('[word="aaa" & pos!="N.*"]').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.ATTR_NAME, "word"),
         Token(TokenType.ATTR_RELATION, "is"),
         Token(TokenType.ATTR_VALUE, "aaa"),
         Token(TokenType.ATTR_AND),
         Token(TokenType.ATTR_NAME, "pos"),
         Token(TokenType.ATTR_RELATION, "is_not"),
         Token(TokenType.ATTR_VALUE, "N.*"),
     ])
Beispiel #6
0
 def test_escape(self):
     tokens = list(Lexer('[word="\t"] [word!="\""]').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.ATTR_NAME, "word"),
         Token(TokenType.ATTR_RELATION, "is"),
         Token(TokenType.ATTR_VALUE, "\t"),
         Token(TokenType.ATTR_NAME, "word"),
         Token(TokenType.ATTR_RELATION, "is_not"),
         Token(TokenType.ATTR_VALUE, '"'),
     ])
Beispiel #7
0
 def test_escape(self):
     with open("escape_quotes.txt") as f: txt = f.read()
     tokens = list(Lexer(txt).generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.DEFAULT_TOKEN, '"')
     ])
Beispiel #8
0
 def test_default_token(self):
     tokens = list(Lexer('"我們"').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.DEFAULT_TOKEN, "我們")
     ])
Beispiel #9
0
 def test_empty_token(self):
     tokens = list(Lexer('[]').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN")
     ])
Beispiel #10
0
 def test_all(self):
     tokens = list(Lexer('[word="把" & pos="P"] [pos!="N[abcd].*|COMMACATEGORY|PERIODCATEGORY"]* obj:[pos="N[abcd].*"] v:[pos="V.*"]').generate_tokens())
     self.assertEqual(tokens, [
         Token(TokenType.ATTR_NAME, "word"),
         Token(TokenType.ATTR_RELATION, 'is'),
         Token(TokenType.ATTR_VALUE, '把'),
         Token(TokenType.ATTR_AND),
         Token(TokenType.ATTR_NAME, "pos"),
         Token(TokenType.ATTR_RELATION, 'is'),
         Token(TokenType.ATTR_VALUE, 'P'),
         Token(TokenType.ATTR_NAME, "pos"),
         Token(TokenType.ATTR_RELATION, 'is_not'),
         Token(TokenType.ATTR_VALUE, 'N[abcd].*|COMMACATEGORY|PERIODCATEGORY'),
         Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')),
         Token(TokenType.TOKEN_LABEL, 'obj'),
         Token(TokenType.ATTR_NAME, "pos"),
         Token(TokenType.ATTR_RELATION, 'is'),
         Token(TokenType.ATTR_VALUE, 'N[abcd].*'),
         Token(TokenType.TOKEN_LABEL, 'v'),
         Token(TokenType.ATTR_NAME, "pos"),
         Token(TokenType.ATTR_RELATION, 'is'),
         Token(TokenType.ATTR_VALUE, 'V.*')
     ])