Example #1
0
 def tokenize_end_of_file(self):
     yield from repeated(self.tokenize_comment)()
     start = self.stream.tell()
     r = self.stream.read(1)
     if r:
         self.stream.seek(start)
         raise TokenizationError(
             f"Expected end of file or new tag at {start} got {r}")
     return iter([])
Example #2
0
 def tokenize_keyword(self):
     return {
         kw_kind: bind(
             repeated(self.tokenize_comment),
             self.binary_ended(
                 tokenize_word(self.stream, kw.encode("ascii"), kw_kind)),
         )
         for kw_kind, kw in TokenKind.keywords().items()
     }
Example #3
0
def test_tokenize_binary_string(roff_binary_body_tokenizer):
    tokenizer = roff_binary_body_tokenizer(b"Aroffstring\0")
    token = next(
        bind(
            repeated(tokenizer.tokenize_comment),
            tokenizer.tokenize_string(TokenKind.STRING_LITERAL),
        )())
    assert token.kind == TokenKind.STRING_LITERAL
    assert token.get_value(tokenizer.stream) == b"Aroffstring"
Example #4
0
    def tokenize_simple_tagkey(self):
        """
        see AbstractRoffTokenizer.tokenize_simple_tagkey
        """
        type_token = next(self.tokenize_simple_type())

        yield type_token
        yield from repeated(self.tokenize_comment)()
        yield from self.tokenize_string(TokenKind.NAME)()
        # We do not use binary_delimited here because in the binary format it is
        # not possible to know whether you have a comment or a value starting with
        # b'#' Therefore we assume that no comments occur when we expect a value
        yield from self.tokenize_value(type_token.kind)()
Example #5
0
 def tokenize_tag_group(self):
     """
     Tokenize an binary tag group, yields
     [
         Token(TokenKind.TAG),
         tokenize_tag_key("bool a 1")
         Token(TokenKind.ENDTAG),
     ]
     for stream containing "tag bool a 1 endtag".
     """
     yield from self.tokenize_keyword[TokenKind.TAG]()
     yield from self.tokenize_name()
     yield from repeated(self.tokenize_tagkey)()
     yield from self.tokenize_keyword[TokenKind.ENDTAG]()
Example #6
0
def test_combinators(inp_str):
    stream = io.StringIO(inp_str)

    foo_tokenizer = tokenize_word(stream, "foo", "foo")
    space_tokenizer = tokenize_word(stream, " ", " ")

    test_tokenizer = repeated(one_of(foo_tokenizer, space_tokenizer))()

    assert next(test_tokenizer).kind == "foo"
    assert next(test_tokenizer).kind == " "
    assert next(test_tokenizer).kind == "foo"
    assert next(test_tokenizer).kind == " "
    assert next(test_tokenizer).kind == "foo"
    with pytest.raises(StopIteration):
        next(test_tokenizer)
Example #7
0
 def tokenize_body(self):
     """
     Tokenize a roff binary file body, that is, everything
     following the b"roff-bin" or "roff-asc" header.
     """
     yield from repeated(self.tokenize_tag_group)()
Example #8
0
 def tokenize_name(self):
     return bind(repeated(self.tokenize_comment),
                 self.tokenize_string(TokenKind.NAME))
Example #9
0
 def tokenize_delimiter(self):
     return repeated(one_of(self.tokenize_comment, self.tokenize_space))
Example #10
0
 def tokenize_array_data(self, element_type, num_values_token):
     """
     see AbstractRoffTokenizer.tokenize_array_data.
     """
     yield from repeated(self.tokenize_value)()