def parse_statement(source: Source) -> StatementType: if source.seek('#'): return parse_line_comment(source) if source.seek('['): return parse_table(source) kv_entry = parse_kv_entry(source) return kv_entry
def _parse_literal_string(source: Source, string_regex=LITERAL_STRING, separator='\'') -> str: source.expect(separator) is_matched = source.consume_regex(string_regex) if not is_matched: raise DoesNotMatch() parsed_str = source.last_consumed source.expect(separator) return parsed_str
def parse_keyword(source: Source) -> str: if source.seek('"'): return _parse_basic_string(source) if source.seek('\''): return _parse_literal_string(source) match = source.consume_regex(KEYWORD_REGEX) if not match: raise DoesNotMatch() return source.last_consumed
def parse_value(source: Source) -> ValueType: boolean_match = source.consume_regex(re.compile(r'(?P<res>(true)|(false))')) if boolean_match: return source.last_consumed == 'true' if source.seek('"""'): return _parse_multiline_string(source) if source.seek('"'): return _parse_basic_string(source) if source.seek('\'\'\''): return _parse_multiline_literal_string(source) if source.seek('\''): return _parse_literal_string(source) # datetime_match = source.consume_regex(DATETIME_REGEX) # if datetime_match: # return _parse_datetime(source.last_consumed) number_match = source.consume_regex(NUMBER_REGEX) if number_match: return _parse_number(source.last_consumed) if source.seek('['): return _parse_array(source) if source.seek('{'): return _parse_inline_table(source) raise DoesNotMatch('Cannot find valid TOML value in string {text}' .format(text=source._text[:100]))
def _parse_table_name(source: Source) -> Tuple[str, ...]: source.consume_regex(WHITESPACE_REGEX) keys = [parse_keyword(source)] source.consume_regex(WHITESPACE_REGEX) while source.consume('.'): source.consume_regex(WHITESPACE_REGEX) key = parse_keyword(source) keys.append(key) source.consume_regex(WHITESPACE_REGEX) return tuple(keys)
def assertInvalidToml(self, text: str): s = Source(text) with self.assertRaises(InvalidTomlError): parse_keyword(s) # if parsed less than whole string, raise exception if len(s._text) != 0: raise InvalidTomlError
def __setitem__(self, key, value): if not isinstance(value, Table): super().__setitem__(key, value) return s = Source(key) node_name = _parse_table_name(s) if len(s._text) > 0: raise ValueError('Invalid table name {}'.format(key)) if node_name in self.statement_nodes: raise NotSupported('Table override is not supported.') current_table = self for part in node_name[:-1]: if part not in current_table: # can't use Table[item] = Table(), use .nodes current_table.nodes[part] = Table(parent=current_table, parent_table_ref_key=part) current_table = current_table[part] current_table.nodes[node_name[-1]] = value value.parent = current_table value.parent_table_ref_key = node_name[-1] self.statement_nodes[node_name] = Table()
def _parse_regex(s: Source, regex: Pattern) -> str: match = s.consume_regex(regex) if not match: raise DoesNotMatch('Cannot match {text}... to regex {regex}'.format( text=s._text[:100], regex=regex )) return s.last_consumed
def parse_line_comment(source: Source) -> Comment: source.expect('#') source.consume_regex(COMMENT_REGEX) text = source.last_consumed source.expect_regex(re.compile(r'(?P<res>\n|\Z)')) return Comment(text)
def parse_kv_entry(source: Source) -> KVEntry: key = parse_keyword(source) source.consume_regex(WHITESPACE_REGEX) source.expect('=') source.consume_regex(WHITESPACE_REGEX) val = parse_value(source) return KVEntry(key, val)
def parse_statements(text: str) -> List[Any]: source = Source(text) statements = [] while len(source._text) > 0: source.consume_regex(ENCLOSING_WHITESPACE_CHARS) # source.consume_regex(TILL_NEW_LINE_REGEX) # is_consumed = source.consume('\n') # if is_consumed: # statements.append(NewLine()) statement = parse_statement(source) # source.consume_regex(TILL_NEW_LINE_REGEX) # # # is_consumed = source.consume('\n') # if is_consumed: # statements.append(NewLine()) source.consume_regex(ENCLOSING_WHITESPACE_CHARS) statements.append(statement) source.expect_eof() return statements
def test_expect_eof_true(self): text = '' s = Source(text) # not raises InvalidTomlError s.expect_eof()
def _parse_array(source: Source) -> List[ValueType]: array_type = None # type: type items = [] # type: List[ValueType] source.expect('[') try: while True: source.consume_regex(ENCLOSING_WHITESPACE_CHARS) parsed_val = parse_value(source) if array_type is not None and \ not isinstance(parsed_val, array_type): raise MixedTypesArray if array_type is None: array_type = type(parsed_val) items.append(parsed_val) source.consume_regex(ENCLOSING_WHITESPACE_CHARS) source.expect(',') except (ExpectationError, DoesNotMatch): pass source.consume_regex(ENCLOSING_WHITESPACE_CHARS) source.expect(']') return items
def test_expect_any_text_chunk_false(self): text = 'World' s = Source(text) with self.assertRaises(ExpectationError): s.expect('Hello')
def test_expect_string_regex_true(self): text = '"Hello"' s = Source(text) s.expect_regex(BASIC_STRING)
def test_expect_eof_false(self): text = '121' s = Source(text) with self.assertRaises(ExpectationError): s.expect_eof()
def test_expect_any_text_chunk_true(self): text = 'Hello' s = Source(text) # not raises anything s.expect(text)
def parse_table(source: Source) -> ParsedTable: source.expect('[') keys = _parse_table_name(source) source.expect(']') return ParsedTable(keys)
def assertInvalidToml(self, original_text: str): s = Source(original_text) with self.assertRaises(InvalidTomlError): parse_value(s)
def assertKeywordParsedCorrectly(self, text: str, expected: str): s = Source(text) parsed = parse_keyword(s) self.assertEqual(parsed, expected) self.assertEqual(len(s._text), 0)
def assertStatementParsedCorrectly(self, original_text: str, expected: Any): s = Source(original_text) parsed = parse_statement(s) self.assertEqual(parsed, expected) self.assertEqual(len(s._text), 0)
def assertValueParsedCorrectly(self, original_text: str, expected: ValueType): s = Source(original_text) parsed = parse_value(s) self.assertEqual(parsed, expected) self.assertEqual(len(s._text), 0)
def _parse_inline_table(source: Source) -> InlineTable: table = InlineTable() source.expect('{') if not source.consume('}'): source.consume_regex(WHITESPACE_REGEX) kv_entry = parse_kv_entry(source) if source.last_consumed == '}': raise InvalidTomlError('Cannot have nested inline tables.') table[kv_entry.key] = kv_entry.val while source.consume(','): source.consume_regex(WHITESPACE_REGEX) kv_entry = parse_kv_entry(source) table[kv_entry.key] = kv_entry.val source.consume_regex(WHITESPACE_REGEX) source.expect('}') return table
def _parse_string(source: Source, string_regex=BASIC_STRING, separator='"') -> str: source.expect(separator) res = [] while True: if not source.consume_regex(string_regex): raise DoesNotMatch('Invalid string starting from {text}' .format(text=source._text[:100])) res.append(source.last_consumed) # start of some escape character if not source.consume('\\'): break # do nothing if new line chars encountered # corresponds to \ if source.consume_regex(NEWLINE_ESCAPE_CHARS_REGEX): pass # read unicode characters elif source.consume_regex(SHORT_UNICODE_REGEX) or source.consume_regex(LONG_UNICODE_REGEX): res.append(chr(int(source.last_consumed, 16))) else: # fail if no escape character follows source.expect_regex(ESCAPE_CHARS_REGEX) res.append(ESCAPES_MAPPING[source.last_consumed]) source.expect(separator) return ''.join(res)