def test_empty(): pi = peeking_iterator([]) assert pi.has_next() assert(pi.next() is None) assert not pi.has_next() pi = peeking_iterator([], end_default=42) assert(pi.next() == 42) assert not pi.has_next()
def test_empty(): pi = peeking_iterator([]) assert pi.has_next() assert (pi.next() is None) assert not pi.has_next() pi = peeking_iterator([], end_default=42) assert (pi.next() == 42) assert not pi.has_next()
def test_push_back(): pi = peeking_iterator([1, 2, 3]) pi.push_back(42) assert pi.peek() == 42 assert pi.next() == 42 assert pi.next() == 1 assert pi.next() == 2
def parse(str, do_print=False): tokenizer = tokenize(str) pi = peeking_iterator(tokenizer, end_default=T_EOF) program = do_parse_program(pi) if do_print: program.pretty_print() return program
def tokenize(string): it = peeking_iterator(iter(string), end_default=EOFToken) tree = initialize_operators() # yell about whitespace at beginning of file c = it.peek() if c.isspace() and c != '\n': raise lexer_error("Unexpected whitespace at beginning of file") while it.has_next(): c = it.next() # EOF if c is EOFToken: yield EOFToken # special whitespace handling elif c == '\n': yield NewlineToken # consume indentation on next line indentation = consume_while(it, lambda c: c.isspace() and c != '\n') if indentation == '': yield IndentationToken(0) else: chars = set(indentation) if chars == {' '}: yield IndentationToken(len(indentation)) elif chars == {'\t'}: yield IndentationToken(len(indentation) * 8) else: raise lexer_error("Invalid indentation: %s" % repr(indentation)) # ignore whitespace elif c.isspace(): continue elif c in QUOTE_CHARS: yield try_consume_string(c, it) # operators (and other punctuation) elif c in PUNCTUATION_CHARS: op = get_operators(tree, c, it) if not op: raise lexer_error("Unexpected character %s" % c) yield op elif c.isalpha() or c == '_': # raw string if c == 'r' and it.peek() in QUOTE_CHARS: q = it.next() yield try_consume_string(q, it, raw=True) else: brwd = c + consume_while(it, lambda c: c.isalnum() or c == '_') kwd = Keyword.is_keyword(brwd) if kwd: yield kwd else: yield BarewordToken(brwd) elif c.isdigit(): yield consume_number(c, it) elif c == '#': consume_while(it, lambda c: c != '\n') else: raise lexer_error("Unexpected character %s" % c)
def test_non_empty(): pi = peeking_iterator([1, 2]) assert pi.has_next() next = pi.peek() assert next == 1 next = pi.next() assert next == 1 next = pi.peek() assert next == 2 next = pi.next() assert next == 2 next = pi.next() assert next is None assert not pi.has_next()
def test_iterate(): lst = [x for x in peeking_iterator([1, 2, 3])] assert lst == [1, 2, 3, None]
def tokenize(string): it = peeking_iterator(iter(string), end_default=T_EOF) while it.has_next(): c = it.next() # EOF if c == T_EOF: yield T_EOF, None # ignore whitespace elif is_whitespace(c): continue # single-character tokens elif c == '(' or c == ')' or c == '\'' or c == '.': yield c, None # strings elif c == '"': string = "" # set to true when an \ is encountered escaped = False while True: if not it.has_next(): raise lexer_error("Unescaped EOF within string: %s" % string) c = it.next() if escaped: if c == 'n': string += '\n' elif c == 't': string += '\t' else: string += c escaped = False else: if c == '\\': escaped = True elif c == '"': break else: string += c yield T_STRING, string # numeric literals elif is_digit(c): string = c + consume_while(it, is_digit) c = it.peek() # float if c == '.': it.next() snd_part = consume_while(it, is_digit) literal = string + '.' + snd_part yield T_FLOAT, float(literal) # rational elif c == '/': it.next() snd_part = consume_while(it, is_digit) if snd_part == "": raise(lexer_error("Invalid rational literal: %s/" % string)) yield T_RATIONAL, (int(string), int(snd_part)) # complex literal elif c == '+': it.next() snd_part = consume_while(it, is_digit) if snd_part == "": raise(lexer_error("Invalid complex literal (missing second part): %s+" % string)) following = it.next() if following != 'i': raise(lexer_error("Invalid complex literal (no following 'i'): %s+%s" % (string, snd_part))) yield T_COMPLEX, (int(string), int(snd_part)) # integer else: yield T_INT, int(string) # booleans elif c == '#': c = it.next() if c == 't': yield T_BOOL, True elif c == 'f': yield T_BOOL, False else: raise(lexer_error("Invalid # literal")) # comments elif c == ';': consume_while(it, lambda c: c != '\n') # names else: name = c + consume_while(it, lambda c: not is_whitespace(c) and c not in ('(', ')', T_EOF)) yield T_NAME, name
def test_number(): str = '42' lexed = lexer.tokenize(str) parsed = zypy_parser.parse_expression(peeking_iterator(lexed)) assert parsed == IntegerToken(42)
def parse(str): tokenizer = tokenize(str) pi = peeking_iterator(tokenizer) program = parse_program(pi) return program