Example #1
0
 def test_parse_long_sentence_small_grammar(self):
     """Make sure we can handle a decently long string."""
     max_string_length = 50
     sentence = list()
     for _ in range(max_string_length):
         if random.random() < 0.5:
             sentence.append(Token(
                 value='0',
                 token_type=ST.ZERO,
                 line_number=0,
             ))
         else:
             sentence.append(Token(
                 value='1',
                 token_type=ST.ONE,
                 line_number=0,
             ))
     sentence.append(Token(
         value='ε',
         token_type=ST.EPSILON,
         line_number=0,
     ))
     self.assertTrue(parse(
         SmallGrammar,
         sentence
     ))
Example #2
0
    def test_parses_ambiguous_grammars(self):
        """Make sure it can parse an ambigous grammar."""
        lexed_positive = [
            Token(
                value="Hegh",
                token_type=AKT.VERB,
                line_number=0,
            ),
            Token(
                value="be'",
                token_type=AKT.BE,
                line_number=0,
            ),
        ]
        self.assertTrue(parse(AmbiguousKlingonGrammar, lexed_positive))

        lexed_negative = [
            Token(
                value="Hegh",
                token_type=AKT.VERB,
                line_number=0,
            ),
            Token(
                value="be'",
                token_type=AKT.BE,
                line_number=0,
            ),
            Token(
                value="be'",
                token_type=AKT.BE,
                line_number=0,
            ),
        ]
        self.assertTrue(parse(AmbiguousKlingonGrammar, lexed_negative))
Example #3
0
 def test_specific(self):
     tokens = [
         Token(
             token_type=TokenType.LPAREN,
             value='',
             line_number=0,
         ),
         Token(
             token_type=TokenType.ARGUMENTS,
             value='',
             line_number=0,
         ),
         Token(
             token_type=TokenType.NEWLINE,
             value='',
             line_number=0,
         ),
     ]
     grammar = lookup(tokens)[0]
     self.assertTrue(grammar is not None)
     if inspect.isclass(grammar):
         parsed = cyk_parse(grammar, tokens)
     else:
         parsed = grammar(tokens)
     self.assertTrue(parsed is not None)
Example #4
0
 def test_parse_simple_nonmember(self):
     """Make sure we reject invalid strings."""
     lexed = [
         Token(
             value="qet",
             token_type=KT.UNKNOWN,
             line_number=0,
         ),
         Token(
             value="be'",
             token_type=KT.NOUN,
             line_number=0,
         ),
     ]
     self.assertFalse(parse(SimpleKlingonGrammar, lexed))
Example #5
0
 def test_parse_simple_member(self):
     """Make sure that we can recognize a valid string in the language."""
     lexed = [
         Token(
             value="SuS",
             token_type=KT.VERB,
             line_number=0,
         ),
         Token(
             value="be'",
             token_type=KT.NOUN,
             line_number=0,
         ),
     ]
     self.assertTrue(parse(SimpleKlingonGrammar, lexed))
Example #6
0
def random_tokens(min_length=1, max_length=20, exclude=set()):
    # type: (int, int, Set[TokenType]) -> Iterable[Token]
    allowable = [x for x in TokenType if x not in exclude]
    ret = list()  # type: List[Token]
    line_number = 0
    for i in range(random.randint(min_length, max_length)):
        _type = random.choice(allowable)  # type: TokenType
        if _type == TokenType.ARGUMENTS:
            value = 'Args'
        elif _type == TokenType.COLON:
            value = ':'
        elif _type == TokenType.DOCTERM:
            value = '"""'
        elif _type == TokenType.HASH:
            value = '#'
        elif _type == TokenType.INDENT:
            value = '    '
        elif _type == TokenType.LPAREN:
            value = '('
        elif _type == TokenType.NEWLINE:
            value = '\n'
        elif _type == TokenType.RAISES:
            value = 'Raises'
        elif _type == TokenType.RETURNS:
            value = 'Returns'
        elif _type == TokenType.RPAREN:
            value = ')'
        elif _type == TokenType.WORD:
            value = random_string()
        elif _type == TokenType.YIELDS:
            value = 'Yields'
        elif _type == TokenType.NOQA:
            value = 'noqa'
        elif _type == TokenType.RETURN_TYPE:
            value = random_string()
        elif _type == TokenType.YIELD_TYPE:
            value = random_string()
        elif _type == TokenType.VARIABLES:
            value = random.choice(['var', 'ivar', 'cvar'])
        elif _type == TokenType.VARIABLE_TYPE:
            value = random_string()
        elif _type == TokenType.ARGUMENT_TYPE:
            value = random_string()
        else:
            raise Exception('Unexpected token type {}'.format(_type))
        ret.append(
            Token(
                token_type=_type,
                value=value,
                line_number=line_number,
            ))
        line_number += random.choice([0, 1])
    return ret
Example #7
0
 def test_parse_returns_parse_tree(self):
     """Make sure the parse returned a valid tree."""
     lexed = [
         Token(
             value="SuS",
             token_type=KT.VERB,
             line_number=0,
         ),
         Token(
             value="be'",
             token_type=KT.NOUN,
             line_number=1,
         ),
     ]
     node = parse(SimpleKlingonGrammar, lexed)
     self.assertTrue(node is not None)
     self.assertEqual(node.symbol, 'sentence')
     self.assertEqual(node.lchild.symbol, 'verb')
     self.assertEqual(node.lchild.value, lexed[0])
     self.assertEqual(node.rchild.symbol, 'noun')
     self.assertEqual(node.rchild.value, lexed[1])
Example #8
0
def lex(poem):
    tokens = list()
    word = ''
    i = 0
    for letter in poem:
        if letter == '\n':
            if word:
                tokens.append(
                    Token(
                        value=word,
                        token_type=PoetryTokenType.WORD,
                        line_number=i,
                    ))
            word = ''
            tokens.append(
                Token(
                    value='\n',
                    token_type=PoetryTokenType.NEWLINE,
                    line_number=i,
                ))
            i += 1
        elif letter.isspace():
            if word:
                tokens.append(
                    Token(
                        value=word,
                        token_type=PoetryTokenType.WORD,
                        line_number=i,
                    ))
            word = ''
        else:
            word += letter
    if word:
        tokens.append(
            Token(
                value=word,
                token_type=PoetryTokenType.WORD,
                line_number=i,
            ))
    return tokens
Example #9
0
    def test_top_parse_sections_le_nonnewline_tokens(self):
        r"""Make sure that aren't too many sections.

        We are attempting to guarantee that
            s <= t
        where
            s = the number of sections,
            t = |{ token_i \in string
                    | token_i /= newline
                    \/ ( token_i+1 /= newline
                        /\ token_i-1 /= newline )}|

        """
        for _ in range(MAX_REPS):
            tokens = random_tokens(exclude={TokenType.NEWLINE})
            doubles_amount = randint(0, 10)
            for _ in range(doubles_amount):
                i = randint(0, len(tokens) - 1)
                tokens.insert(
                    i,
                    Token(
                        value='\n',
                        token_type=TokenType.NEWLINE,
                        line_number=0,
                    ),
                )
                tokens.insert(
                    i,
                    Token(
                        value='\n',
                        token_type=TokenType.NEWLINE,
                        line_number=0,
                    ),
                )
            parsed = top_parse(tokens)
            self.assertTrue(
                len(parsed) <= len(tokens)
            )
Example #10
0
def pn_lex(source):
    tokens = list()
    for letter in source:
        if letter.isspace():
            continue
        elif letter == '.':
            tokens.append(Token(
                token_type=PN.DOT,
                value='.',
                line_number=0,
            ))
        elif letter == '-':
            tokens.append(Token(
                token_type=PN.DASH,
                value='-',
                line_number=0,
            ))
        elif letter.isdigit:
            tokens.append(Token(
                token_type=PN.NUMBER,
                value=letter,
                line_number=0,
            ))
    return tokens
Example #11
0
def _lex(sentence):
    lookup = {
        "Hegh": GTT.intransitive_verb,
        "quS": GTT.intransitive_verb,
        "HoH": GTT.transitive_verb,
        "qIp": GTT.transitive_verb,
        "Duj": GTT.noun,
        "loD": GTT.noun,
        "puq": GTT.noun,
        "bIQ": GTT.noun,
        "val": GTT.adjective,
        "QIp": GTT.adjective,
    }
    for word in sentence.split():
        yield Token(
            value=word,
            token_type=lookup.get(word, GTT.unknown),
            line_number=0,
        )
Example #12
0
def ekg_lex(s):
    lookup = {
        'loD': EKG.NOUN,
        'qam': EKG.NOUN,
        'qet': EKG.INTRANSITIVE_VERB,
        'qIp': EKG.TRANSITIVE_VERB,
    }
    ret = []
    i = 0
    for line in s.split('\n'):
        for word in line.split():
            ret.append(
                Token(
                    value=word,
                    token_type=lookup[word],
                    line_number=i,
                )
            )
        i += 1
    return ret
Example #13
0
def _v():
    return CykNode('value', value=Token(target, TokenType.WORD, 0))