def test_parse_long_sentence_small_grammar(self): """Make sure we can handle a decently long string.""" max_string_length = 50 sentence = list() for _ in range(max_string_length): if random.random() < 0.5: sentence.append(Token( value='0', token_type=ST.ZERO, line_number=0, )) else: sentence.append(Token( value='1', token_type=ST.ONE, line_number=0, )) sentence.append(Token( value='ε', token_type=ST.EPSILON, line_number=0, )) self.assertTrue(parse( SmallGrammar, sentence ))
def test_parses_ambiguous_grammars(self): """Make sure it can parse an ambigous grammar.""" lexed_positive = [ Token( value="Hegh", token_type=AKT.VERB, line_number=0, ), Token( value="be'", token_type=AKT.BE, line_number=0, ), ] self.assertTrue(parse(AmbiguousKlingonGrammar, lexed_positive)) lexed_negative = [ Token( value="Hegh", token_type=AKT.VERB, line_number=0, ), Token( value="be'", token_type=AKT.BE, line_number=0, ), Token( value="be'", token_type=AKT.BE, line_number=0, ), ] self.assertTrue(parse(AmbiguousKlingonGrammar, lexed_negative))
def test_specific(self): tokens = [ Token( token_type=TokenType.LPAREN, value='', line_number=0, ), Token( token_type=TokenType.ARGUMENTS, value='', line_number=0, ), Token( token_type=TokenType.NEWLINE, value='', line_number=0, ), ] grammar = lookup(tokens)[0] self.assertTrue(grammar is not None) if inspect.isclass(grammar): parsed = cyk_parse(grammar, tokens) else: parsed = grammar(tokens) self.assertTrue(parsed is not None)
def test_parse_simple_nonmember(self): """Make sure we reject invalid strings.""" lexed = [ Token( value="qet", token_type=KT.UNKNOWN, line_number=0, ), Token( value="be'", token_type=KT.NOUN, line_number=0, ), ] self.assertFalse(parse(SimpleKlingonGrammar, lexed))
def test_parse_simple_member(self): """Make sure that we can recognize a valid string in the language.""" lexed = [ Token( value="SuS", token_type=KT.VERB, line_number=0, ), Token( value="be'", token_type=KT.NOUN, line_number=0, ), ] self.assertTrue(parse(SimpleKlingonGrammar, lexed))
def random_tokens(min_length=1, max_length=20, exclude=set()): # type: (int, int, Set[TokenType]) -> Iterable[Token] allowable = [x for x in TokenType if x not in exclude] ret = list() # type: List[Token] line_number = 0 for i in range(random.randint(min_length, max_length)): _type = random.choice(allowable) # type: TokenType if _type == TokenType.ARGUMENTS: value = 'Args' elif _type == TokenType.COLON: value = ':' elif _type == TokenType.DOCTERM: value = '"""' elif _type == TokenType.HASH: value = '#' elif _type == TokenType.INDENT: value = ' ' elif _type == TokenType.LPAREN: value = '(' elif _type == TokenType.NEWLINE: value = '\n' elif _type == TokenType.RAISES: value = 'Raises' elif _type == TokenType.RETURNS: value = 'Returns' elif _type == TokenType.RPAREN: value = ')' elif _type == TokenType.WORD: value = random_string() elif _type == TokenType.YIELDS: value = 'Yields' elif _type == TokenType.NOQA: value = 'noqa' elif _type == TokenType.RETURN_TYPE: value = random_string() elif _type == TokenType.YIELD_TYPE: value = random_string() elif _type == TokenType.VARIABLES: value = random.choice(['var', 'ivar', 'cvar']) elif _type == TokenType.VARIABLE_TYPE: value = random_string() elif _type == TokenType.ARGUMENT_TYPE: value = random_string() else: raise Exception('Unexpected token type {}'.format(_type)) ret.append( Token( token_type=_type, value=value, line_number=line_number, )) line_number += random.choice([0, 1]) return ret
def test_parse_returns_parse_tree(self): """Make sure the parse returned a valid tree.""" lexed = [ Token( value="SuS", token_type=KT.VERB, line_number=0, ), Token( value="be'", token_type=KT.NOUN, line_number=1, ), ] node = parse(SimpleKlingonGrammar, lexed) self.assertTrue(node is not None) self.assertEqual(node.symbol, 'sentence') self.assertEqual(node.lchild.symbol, 'verb') self.assertEqual(node.lchild.value, lexed[0]) self.assertEqual(node.rchild.symbol, 'noun') self.assertEqual(node.rchild.value, lexed[1])
def lex(poem): tokens = list() word = '' i = 0 for letter in poem: if letter == '\n': if word: tokens.append( Token( value=word, token_type=PoetryTokenType.WORD, line_number=i, )) word = '' tokens.append( Token( value='\n', token_type=PoetryTokenType.NEWLINE, line_number=i, )) i += 1 elif letter.isspace(): if word: tokens.append( Token( value=word, token_type=PoetryTokenType.WORD, line_number=i, )) word = '' else: word += letter if word: tokens.append( Token( value=word, token_type=PoetryTokenType.WORD, line_number=i, )) return tokens
def test_top_parse_sections_le_nonnewline_tokens(self): r"""Make sure that aren't too many sections. We are attempting to guarantee that s <= t where s = the number of sections, t = |{ token_i \in string | token_i /= newline \/ ( token_i+1 /= newline /\ token_i-1 /= newline )}| """ for _ in range(MAX_REPS): tokens = random_tokens(exclude={TokenType.NEWLINE}) doubles_amount = randint(0, 10) for _ in range(doubles_amount): i = randint(0, len(tokens) - 1) tokens.insert( i, Token( value='\n', token_type=TokenType.NEWLINE, line_number=0, ), ) tokens.insert( i, Token( value='\n', token_type=TokenType.NEWLINE, line_number=0, ), ) parsed = top_parse(tokens) self.assertTrue( len(parsed) <= len(tokens) )
def pn_lex(source): tokens = list() for letter in source: if letter.isspace(): continue elif letter == '.': tokens.append(Token( token_type=PN.DOT, value='.', line_number=0, )) elif letter == '-': tokens.append(Token( token_type=PN.DASH, value='-', line_number=0, )) elif letter.isdigit: tokens.append(Token( token_type=PN.NUMBER, value=letter, line_number=0, )) return tokens
def _lex(sentence): lookup = { "Hegh": GTT.intransitive_verb, "quS": GTT.intransitive_verb, "HoH": GTT.transitive_verb, "qIp": GTT.transitive_verb, "Duj": GTT.noun, "loD": GTT.noun, "puq": GTT.noun, "bIQ": GTT.noun, "val": GTT.adjective, "QIp": GTT.adjective, } for word in sentence.split(): yield Token( value=word, token_type=lookup.get(word, GTT.unknown), line_number=0, )
def ekg_lex(s): lookup = { 'loD': EKG.NOUN, 'qam': EKG.NOUN, 'qet': EKG.INTRANSITIVE_VERB, 'qIp': EKG.TRANSITIVE_VERB, } ret = [] i = 0 for line in s.split('\n'): for word in line.split(): ret.append( Token( value=word, token_type=lookup[word], line_number=i, ) ) i += 1 return ret
def _v(): return CykNode('value', value=Token(target, TokenType.WORD, 0))