def create_string_token(text, bare_string_allowed=False, multiline_strings_allowed=True): """ Creates and returns a single string token. Raises ValueError on non-string input. """ if not isinstance(text, six.string_types): raise ValueError('Given value must be a string') if text == '': return tokens.Token( tokens.TYPE_STRING, '""'.format(_escape_single_line_quoted_string(text))) elif bare_string_allowed and _bare_string_regex.match(text): return tokens.Token(tokens.TYPE_BARE_STRING, text) elif multiline_strings_allowed and ( len(tuple(c for c in text if c == '\n')) >= 2 or len(text) > 80): # If containing two or more newlines or is longer than 80 characters we'll use the multiline string format return _create_multiline_string_token(text) else: return tokens.Token( tokens.TYPE_STRING, '"{}"'.format(_escape_single_line_quoted_string(text)))
def create_primitive_token(value, multiline_strings_allowed=True): """ Creates and returns a single token for the given primitive atomic value. Raises NotPrimitiveError when the given value is not a primitive atomic value """ if value is None: return create_primitive_token('') elif isinstance(value, bool): return tokens.Token(tokens.TYPE_BOOLEAN, u'true' if value else u'false') elif isinstance(value, int): return tokens.Token(tokens.TYPE_INTEGER, u'{}'.format(value)) elif isinstance(value, float): return tokens.Token(tokens.TYPE_FLOAT, u'{}'.format(value)) elif isinstance(value, (datetime.datetime, datetime.date, datetime.time)): s = value.isoformat() if s.endswith('+00:00'): s = s[:-6] + 'Z' return tokens.Token(tokens.TYPE_DATE, s) elif isinstance(value, six.string_types): return create_string_token( value, multiline_strings_allowed=multiline_strings_allowed) raise NotPrimitiveError("{} of type {}".format(value, type(value)))
def _create_multiline_string_token(text): escaped = text.replace(u'"""', u'\"\"\"') if len(escaped) > 50: return tokens.Token( tokens.TYPE_MULTILINE_STRING, u'"""\n{}\\\n"""'.format(_break_long_text(escaped))) else: return tokens.Token(tokens.TYPE_MULTILINE_STRING, u'"""{}"""'.format(escaped))
def _next_token_candidates(source): matches = [] for token_spec in _LEXICAL_SPECS: match = token_spec.re.search(source) if match: matches.append(tokens.Token(token_spec.type, match.group(1))) return matches
def test_string(): t0 = tokens.Token(tokens.TYPE_BARE_STRING, 'fawzy') assert toml2py.deserialize(t0) == 'fawzy' t1 = tokens.Token( tokens.TYPE_STRING, '"I\'m a string. \\"You can quote me\\". Name\\tJos\\u00E9\\nLocation\\tSF."' ) assert toml2py.deserialize( t1 ) == u'I\'m a string. "You can quote me". Name\tJos\xe9\nLocation\tSF.' t2 = tokens.Token(tokens.TYPE_MULTILINE_STRING, '"""\nRoses are red\nViolets are blue"""') assert toml2py.deserialize(t2) == 'Roses are red\nViolets are blue' t3_str = '"""\nThe quick brown \\\n\n\n fox jumps over \\\n the lazy dog."""' t3 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t3_str) assert toml2py.deserialize( t3) == 'The quick brown fox jumps over the lazy dog.' t4_str = '"""\\\n The quick brown \\\n fox jumps over \\\n the lazy dog.\\\n """' t4 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t4_str) assert toml2py.deserialize( t4) == 'The quick brown fox jumps over the lazy dog.' t5 = tokens.Token(tokens.TYPE_LITERAL_STRING, r"'C:\Users\nodejs\templates'") assert toml2py.deserialize(t5) == r'C:\Users\nodejs\templates' t6_str = "'''\nThe first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n'''" t6 = tokens.Token(tokens.TYPE_MULTILINE_LITERAL_STRING, t6_str) assert toml2py.deserialize(t6) == 'The first newline is\ntrimmed in raw strings.\n All' \ ' other whitespace\n is preserved.\n'
def test_date(): t0 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T07:32:00Z') assert toml2py.deserialize(t0) == datetime(1979, 5, 27, 7, 32, tzinfo=pytz.utc) t1 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T00:32:00-07:00') assert toml2py.deserialize(t1) == datetime(1979, 5, 27, 7, 32, tzinfo=pytz.utc) t3 = tokens.Token(tokens.TYPE_DATE, '1987-07-05T17:45:00') try: toml2py.deserialize(t3) assert False, 'Should detect malformed date' except DeserializationError: pass
def create_primitive_token(value, multiline_strings_allowed=True): """ Creates and returns a single token for the given primitive atomic value. Raises NotPrimitiveError when the given value is not a primitive atomic value """ if value is None: return create_primitive_token('') elif isinstance(value, bool): return tokens.Token(tokens.TYPE_BOOLEAN, u'true' if value else u'false') elif isinstance(value, int): return tokens.Token(tokens.TYPE_INTEGER, u'{}'.format(value)) elif isinstance(value, float): return tokens.Token(tokens.TYPE_FLOAT, u'{}'.format(value)) elif isinstance(value, (datetime.datetime, datetime.date, datetime.time)): ts = timestamp(value) // 1000 return tokens.Token(tokens.TYPE_DATE, strict_rfc3339.timestamp_to_rfc3339_utcoffset(ts)) elif isinstance(value, six.string_types): return create_string_token( value, multiline_strings_allowed=multiline_strings_allowed) raise NotPrimitiveError("{} of type {}".format(value, type(value)))
def create_operator_element(operator): """ Creates a PunctuationElement instance containing an operator token of the specified type. The operator should be a TOML source str. """ operator_type_map = { ',': tokens.TYPE_OP_COMMA, '=': tokens.TYPE_OP_ASSIGNMENT, '[': tokens.TYPE_OP_SQUARE_LEFT_BRACKET, ']': tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, '[[': tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET, ']]': tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET, '{': tokens.TYPE_OP_CURLY_LEFT_BRACKET, '}': tokens.TYPE_OP_CURLY_RIGHT_BRACKET, } ts = (tokens.Token(operator_type_map[operator], operator), ) return PunctuationElement(ts)
def tokenize(source, is_top_level=False): """ Tokenizes the input TOML source into a stream of tokens. If is_top_level is set to True, will make sure that the input source has a trailing newline character before it is tokenized. Raises a LexerError when it fails recognize another token while not at the end of the source. """ # Newlines are going to be normalized to UNIX newlines. source = source.replace('\r\n', '\n') if is_top_level and source and source[-1] != '\n': source += '\n' next_row = 1 next_col = 1 next_index = 0 while next_index < len(source): new_token = _munch_a_token(source[next_index:]) if not new_token: raise LexerError( "failed to read the next token at ({}, {}): {}".format( next_row, next_col, source[next_index:])) # Set the col and row on the new token new_token = tokens.Token(new_token.type, new_token.source_substring, next_col, next_row) # Advance the index, row and col count next_index += len(new_token.source_substring) for c in new_token.source_substring: if c == '\n': next_row += 1 next_col = 1 else: next_col += 1 yield new_token
def create_whitespace_element(length=1, char=' '): """ Creates and returns a WhitespaceElement containing spaces. """ ts = (tokens.Token(tokens.TYPE_WHITESPACE, char), ) * length return WhitespaceElement(ts)
def create_newline_element(): """ Creates and returns a single NewlineElement. """ ts = (tokens.Token(tokens.TYPE_NEWLINE, '\n'), ) return NewlineElement(ts)
def test_token_date(dt, content): token = create_primitive_token(dt) assert token == tokens.Token(tokens.TYPE_DATE, content)
def test_integer(): t1 = tokens.Token(tokens.TYPE_INTEGER, '42') t2 = tokens.Token(tokens.TYPE_INTEGER, '1_001_2') assert toml2py.deserialize(t1) == 42 assert toml2py.deserialize(t2) == 10012
def test_float(): tokens_and_values = (('4.2', 4.2), ('12e2', 12e2), ('1_000e2', 1e5), ('314.1e-2', 3.141)) for token_string, value in tokens_and_values: token = tokens.Token(tokens.TYPE_FLOAT, token_string) assert toml2py.deserialize(token) == value
import datetime import six from prettytoml import tokens import re from prettytoml.errors import TOMLError from prettytoml.tokens import Token from prettytoml.util import chunkate_string class NotPrimitiveError(TOMLError): pass _operator_tokens_by_type = { tokens.TYPE_OP_SQUARE_LEFT_BRACKET: tokens.Token(tokens.TYPE_OP_SQUARE_LEFT_BRACKET, u'['), tokens.TYPE_OP_SQUARE_RIGHT_BRACKET: tokens.Token(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, u']'), tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET: tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET, u'[['), tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET: tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET, u']]'), tokens.TYPE_OP_COMMA: tokens.Token(tokens.TYPE_OP_COMMA, u','), tokens.TYPE_NEWLINE: tokens.Token(tokens.TYPE_NEWLINE, u'\n'), tokens.TYPE_OPT_DOT: tokens.Token(tokens.TYPE_OPT_DOT, u'.'), }