예제 #1
0
def create_string_token(text,
                        bare_string_allowed=False,
                        multiline_strings_allowed=True):
    """
    Creates and returns a single string token.

    Raises ValueError on non-string input.
    """

    if not isinstance(text, six.string_types):
        raise ValueError('Given value must be a string')

    if text == '':
        return tokens.Token(
            tokens.TYPE_STRING,
            '""'.format(_escape_single_line_quoted_string(text)))
    elif bare_string_allowed and _bare_string_regex.match(text):
        return tokens.Token(tokens.TYPE_BARE_STRING, text)
    elif multiline_strings_allowed and (
            len(tuple(c for c in text if c == '\n')) >= 2 or len(text) > 80):
        # If containing two or more newlines or is longer than 80 characters we'll use the multiline string format
        return _create_multiline_string_token(text)
    else:
        return tokens.Token(
            tokens.TYPE_STRING,
            '"{}"'.format(_escape_single_line_quoted_string(text)))
예제 #2
0
def create_primitive_token(value, multiline_strings_allowed=True):
    """
    Creates and returns a single token for the given primitive atomic value.

    Raises NotPrimitiveError when the given value is not a primitive atomic value
    """
    if value is None:
        return create_primitive_token('')
    elif isinstance(value, bool):
        return tokens.Token(tokens.TYPE_BOOLEAN,
                            u'true' if value else u'false')
    elif isinstance(value, int):
        return tokens.Token(tokens.TYPE_INTEGER, u'{}'.format(value))
    elif isinstance(value, float):
        return tokens.Token(tokens.TYPE_FLOAT, u'{}'.format(value))
    elif isinstance(value, (datetime.datetime, datetime.date, datetime.time)):
        s = value.isoformat()
        if s.endswith('+00:00'):
            s = s[:-6] + 'Z'
        return tokens.Token(tokens.TYPE_DATE, s)
    elif isinstance(value, six.string_types):
        return create_string_token(
            value, multiline_strings_allowed=multiline_strings_allowed)

    raise NotPrimitiveError("{} of type {}".format(value, type(value)))
예제 #3
0
def _create_multiline_string_token(text):
    escaped = text.replace(u'"""', u'\"\"\"')
    if len(escaped) > 50:
        return tokens.Token(
            tokens.TYPE_MULTILINE_STRING,
            u'"""\n{}\\\n"""'.format(_break_long_text(escaped)))
    else:
        return tokens.Token(tokens.TYPE_MULTILINE_STRING,
                            u'"""{}"""'.format(escaped))
예제 #4
0
def _next_token_candidates(source):
    matches = []
    for token_spec in _LEXICAL_SPECS:
        match = token_spec.re.search(source)
        if match:
            matches.append(tokens.Token(token_spec.type, match.group(1)))
    return matches
예제 #5
0
def test_string():

    t0 = tokens.Token(tokens.TYPE_BARE_STRING, 'fawzy')
    assert toml2py.deserialize(t0) == 'fawzy'

    t1 = tokens.Token(
        tokens.TYPE_STRING,
        '"I\'m a string. \\"You can quote me\\". Name\\tJos\\u00E9\\nLocation\\tSF."'
    )
    assert toml2py.deserialize(
        t1
    ) == u'I\'m a string. "You can quote me". Name\tJos\xe9\nLocation\tSF.'

    t2 = tokens.Token(tokens.TYPE_MULTILINE_STRING,
                      '"""\nRoses are red\nViolets are blue"""')
    assert toml2py.deserialize(t2) == 'Roses are red\nViolets are blue'

    t3_str = '"""\nThe quick brown \\\n\n\n  fox jumps over \\\n    the lazy dog."""'
    t3 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t3_str)
    assert toml2py.deserialize(
        t3) == 'The quick brown fox jumps over the lazy dog.'

    t4_str = '"""\\\n       The quick brown \\\n       fox jumps over \\\n       the lazy dog.\\\n       """'
    t4 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t4_str)
    assert toml2py.deserialize(
        t4) == 'The quick brown fox jumps over the lazy dog.'

    t5 = tokens.Token(tokens.TYPE_LITERAL_STRING,
                      r"'C:\Users\nodejs\templates'")
    assert toml2py.deserialize(t5) == r'C:\Users\nodejs\templates'

    t6_str = "'''\nThe first newline is\ntrimmed in raw strings.\n   All other whitespace\n   is preserved.\n'''"
    t6 = tokens.Token(tokens.TYPE_MULTILINE_LITERAL_STRING, t6_str)
    assert toml2py.deserialize(t6) == 'The first newline is\ntrimmed in raw strings.\n   All' \
                                      ' other whitespace\n   is preserved.\n'
예제 #6
0
def test_date():
    t0 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T07:32:00Z')
    assert toml2py.deserialize(t0) == datetime(1979,
                                               5,
                                               27,
                                               7,
                                               32,
                                               tzinfo=pytz.utc)

    t1 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T00:32:00-07:00')
    assert toml2py.deserialize(t1) == datetime(1979,
                                               5,
                                               27,
                                               7,
                                               32,
                                               tzinfo=pytz.utc)

    t3 = tokens.Token(tokens.TYPE_DATE, '1987-07-05T17:45:00')
    try:
        toml2py.deserialize(t3)
        assert False, 'Should detect malformed date'
    except DeserializationError:
        pass
예제 #7
0
def create_primitive_token(value, multiline_strings_allowed=True):
    """
    Creates and returns a single token for the given primitive atomic value.

    Raises NotPrimitiveError when the given value is not a primitive atomic value
    """
    if value is None:
        return create_primitive_token('')
    elif isinstance(value, bool):
        return tokens.Token(tokens.TYPE_BOOLEAN,
                            u'true' if value else u'false')
    elif isinstance(value, int):
        return tokens.Token(tokens.TYPE_INTEGER, u'{}'.format(value))
    elif isinstance(value, float):
        return tokens.Token(tokens.TYPE_FLOAT, u'{}'.format(value))
    elif isinstance(value, (datetime.datetime, datetime.date, datetime.time)):
        ts = timestamp(value) // 1000
        return tokens.Token(tokens.TYPE_DATE,
                            strict_rfc3339.timestamp_to_rfc3339_utcoffset(ts))
    elif isinstance(value, six.string_types):
        return create_string_token(
            value, multiline_strings_allowed=multiline_strings_allowed)

    raise NotPrimitiveError("{} of type {}".format(value, type(value)))
예제 #8
0
def create_operator_element(operator):
    """
    Creates a PunctuationElement instance containing an operator token of the specified type. The operator
    should be a TOML source str.
    """
    operator_type_map = {
        ',': tokens.TYPE_OP_COMMA,
        '=': tokens.TYPE_OP_ASSIGNMENT,
        '[': tokens.TYPE_OP_SQUARE_LEFT_BRACKET,
        ']': tokens.TYPE_OP_SQUARE_RIGHT_BRACKET,
        '[[': tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET,
        ']]': tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET,
        '{': tokens.TYPE_OP_CURLY_LEFT_BRACKET,
        '}': tokens.TYPE_OP_CURLY_RIGHT_BRACKET,
    }

    ts = (tokens.Token(operator_type_map[operator], operator), )
    return PunctuationElement(ts)
예제 #9
0
def tokenize(source, is_top_level=False):
    """
    Tokenizes the input TOML source into a stream of tokens.

    If is_top_level is set to True, will make sure that the input source has a trailing newline character
    before it is tokenized.

    Raises a LexerError when it fails recognize another token while not at the end of the source.
    """

    # Newlines are going to be normalized to UNIX newlines.
    source = source.replace('\r\n', '\n')

    if is_top_level and source and source[-1] != '\n':
        source += '\n'

    next_row = 1
    next_col = 1
    next_index = 0

    while next_index < len(source):

        new_token = _munch_a_token(source[next_index:])

        if not new_token:
            raise LexerError(
                "failed to read the next token at ({}, {}): {}".format(
                    next_row, next_col, source[next_index:]))

        # Set the col and row on the new token
        new_token = tokens.Token(new_token.type, new_token.source_substring,
                                 next_col, next_row)

        # Advance the index, row and col count
        next_index += len(new_token.source_substring)
        for c in new_token.source_substring:
            if c == '\n':
                next_row += 1
                next_col = 1
            else:
                next_col += 1

        yield new_token
예제 #10
0
def create_whitespace_element(length=1, char=' '):
    """
    Creates and returns a WhitespaceElement containing spaces.
    """
    ts = (tokens.Token(tokens.TYPE_WHITESPACE, char), ) * length
    return WhitespaceElement(ts)
예제 #11
0
def create_newline_element():
    """
    Creates and returns a single NewlineElement.
    """
    ts = (tokens.Token(tokens.TYPE_NEWLINE, '\n'), )
    return NewlineElement(ts)
예제 #12
0
def test_token_date(dt, content):
    token = create_primitive_token(dt)
    assert token == tokens.Token(tokens.TYPE_DATE, content)
예제 #13
0
def test_integer():
    t1 = tokens.Token(tokens.TYPE_INTEGER, '42')
    t2 = tokens.Token(tokens.TYPE_INTEGER, '1_001_2')

    assert toml2py.deserialize(t1) == 42
    assert toml2py.deserialize(t2) == 10012
예제 #14
0
def test_float():
    tokens_and_values = (('4.2', 4.2), ('12e2', 12e2), ('1_000e2', 1e5),
                         ('314.1e-2', 3.141))
    for token_string, value in tokens_and_values:
        token = tokens.Token(tokens.TYPE_FLOAT, token_string)
        assert toml2py.deserialize(token) == value
예제 #15
0
import datetime
import six
from prettytoml import tokens
import re
from prettytoml.errors import TOMLError
from prettytoml.tokens import Token
from prettytoml.util import chunkate_string


class NotPrimitiveError(TOMLError):
    pass


_operator_tokens_by_type = {
    tokens.TYPE_OP_SQUARE_LEFT_BRACKET:
    tokens.Token(tokens.TYPE_OP_SQUARE_LEFT_BRACKET, u'['),
    tokens.TYPE_OP_SQUARE_RIGHT_BRACKET:
    tokens.Token(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, u']'),
    tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET:
    tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET, u'[['),
    tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET:
    tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET, u']]'),
    tokens.TYPE_OP_COMMA:
    tokens.Token(tokens.TYPE_OP_COMMA, u','),
    tokens.TYPE_NEWLINE:
    tokens.Token(tokens.TYPE_NEWLINE, u'\n'),
    tokens.TYPE_OPT_DOT:
    tokens.Token(tokens.TYPE_OPT_DOT, u'.'),
}