def tokenize_end_of_file(self): yield from repeated(self.tokenize_comment)() start = self.stream.tell() r = self.stream.read(1) if r: self.stream.seek(start) raise TokenizationError( f"Expected end of file or new tag at {start} got {r}") return iter([])
def tokenize_keyword(self): return { kw_kind: bind( repeated(self.tokenize_comment), self.binary_ended( tokenize_word(self.stream, kw.encode("ascii"), kw_kind)), ) for kw_kind, kw in TokenKind.keywords().items() }
def test_tokenize_binary_string(roff_binary_body_tokenizer): tokenizer = roff_binary_body_tokenizer(b"Aroffstring\0") token = next( bind( repeated(tokenizer.tokenize_comment), tokenizer.tokenize_string(TokenKind.STRING_LITERAL), )()) assert token.kind == TokenKind.STRING_LITERAL assert token.get_value(tokenizer.stream) == b"Aroffstring"
def tokenize_simple_tagkey(self): """ see AbstractRoffTokenizer.tokenize_simple_tagkey """ type_token = next(self.tokenize_simple_type()) yield type_token yield from repeated(self.tokenize_comment)() yield from self.tokenize_string(TokenKind.NAME)() # We do not use binary_delimited here because in the binary format it is # not possible to know whether you have a comment or a value starting with # b'#' Therefore we assume that no comments occur when we expect a value yield from self.tokenize_value(type_token.kind)()
def tokenize_tag_group(self): """ Tokenize an binary tag group, yields [ Token(TokenKind.TAG), tokenize_tag_key("bool a 1") Token(TokenKind.ENDTAG), ] for stream containing "tag bool a 1 endtag". """ yield from self.tokenize_keyword[TokenKind.TAG]() yield from self.tokenize_name() yield from repeated(self.tokenize_tagkey)() yield from self.tokenize_keyword[TokenKind.ENDTAG]()
def test_combinators(inp_str): stream = io.StringIO(inp_str) foo_tokenizer = tokenize_word(stream, "foo", "foo") space_tokenizer = tokenize_word(stream, " ", " ") test_tokenizer = repeated(one_of(foo_tokenizer, space_tokenizer))() assert next(test_tokenizer).kind == "foo" assert next(test_tokenizer).kind == " " assert next(test_tokenizer).kind == "foo" assert next(test_tokenizer).kind == " " assert next(test_tokenizer).kind == "foo" with pytest.raises(StopIteration): next(test_tokenizer)
def tokenize_body(self): """ Tokenize a roff binary file body, that is, everything following the b"roff-bin" or "roff-asc" header. """ yield from repeated(self.tokenize_tag_group)()
def tokenize_name(self): return bind(repeated(self.tokenize_comment), self.tokenize_string(TokenKind.NAME))
def tokenize_delimiter(self): return repeated(one_of(self.tokenize_comment, self.tokenize_space))
def tokenize_array_data(self, element_type, num_values_token): """ see AbstractRoffTokenizer.tokenize_array_data. """ yield from repeated(self.tokenize_value)()