def parse_section(ctx: CTX, mark: str, location: Location, content: Content, before_mark_indentation: int) -> int: if mark not in heading_block_marks: return PASS parent_section = ctx.get_parent_section() section_level = section_levels[mark] if (parent_section is not None and parent_section.level >= section_level): content.go_back(location) return EXIT # TODO is this ok? skip_void(content) after_mark_indentation = content.column section = Section(location, section_level) ctx.composer.add(section) ctx.section_stack.append(section) section.heading = capture_component(ctx, after_mark_indentation, True) section.content = capture_component(ctx, before_mark_indentation, True) ctx.section_stack.pop() return CONSUMED
def parse_inline_style(ctx: CTX, mark: str, location: Location, content: Content, indentation: int) -> int: if mark == strong_begin_mark: end_mark = strong_end_mark style = 'strong' elif mark == emphasized_begin_mark: end_mark = emphasized_end_mark style = 'emphasized' elif mark == code_begin_mark: end_mark = code_end_mark style = 'code' elif mark == deleted_begin_mark: end_mark = deleted_end_mark style = 'deleted' elif mark == d_quote_begin_mark: end_mark = d_quote_end_mark style = 'double-quote' elif mark == s_quote_begin_mark: end_mark = s_quote_end_mark style = 'single-quote' else: return PASS with ctx.using_stop_mark(end_mark): # It uses the original indentation # so the paragraph can be continued. contents = parse_inline(ctx, content.get_location(), content, indentation) if not content.pull(end_mark): raise StxError(f'Expected mark: {end_mark}') ctx.composer.add(StyledText(location, contents, style)) return CONSUMED
def skip_void(content: Content): while True: c = content.peek() if c is None: break elif c in WHITESPACE_CHARS: content.move_next() else: break
def parse_attribute(ctx: CTX, mark: str, content: Content) -> int: if mark != attribute_special_mark: return PASS entry = parse_entry(content) content.expect_end_of_line() ctx.composer.push_attribute(entry.name, entry.value) return CONSUMED
def parse_container(ctx: CTX, mark: str, location: Location, content: Content, indentation_before_mark: int) -> int: if mark != container_area_begin_mark: return PASS content.read_spaces() function = try_parse_entry(content) content.expect_end_of_line() with ctx.using_stop_mark(container_area_end_mark): component = capture_component(ctx, indentation_before_mark) content.pull(container_area_end_mark) content.expect_end_of_line() if function is not None: component = FunctionCall( location, inline=False, key=function.name, options=function.value, argument=component, ) ctx.composer.add(component) return CONSUMED
def parse_inline_link(ctx: CTX, mark: str, location: Location, content: Content, indentation: int) -> int: if mark != link_text_begin_mark: return PASS with ctx.using_stop_mark(link_text_end_mark): # It uses the original indentation # so the paragraph can be continued. contents = parse_inline(ctx, content.get_location(), content, indentation) if not content.pull(link_text_end_mark): raise StxError(f'Expected mark: {link_text_end_mark}') if content.pull(link_ref_begin_mark): out = StringIO() while not content.pull(link_ref_end_mark): c = content.peek() if c is None: raise StxError(f'Expected {link_ref_end_mark}', content.get_location()) out.write(c) content.move_next() reference = out.getvalue() else: reference = None ctx.composer.add(LinkText(location, contents, reference)) return CONSUMED
def try_parse_item(content: Content, allow_entry_separator=True) -> Optional[Value]: c = content.peek() if c == EMPTY_CHAR: content.move_next() return Empty() group = try_parse_group(content) if group is not None: return group return try_parse_token_or_entry(content, allow_entry_separator)
def parse_entry(content: Content) -> Entry: location = content.get_location() entry = try_parse_entry(content) if entry is None: raise StxError('Expected token or entry', location) return entry
def parse_inline_function(ctx: CTX, mark: str, location: Location, content: Content) -> int: if mark != function_begin_mark: return PASS skip_void(content) function_location = content.get_location() function = parse_entry(content) skip_void(content) if not content.pull(function_end_mark): raise StxError(f'Expected mark: {function_end_mark}') ctx.composer.add( FunctionCall(function_location, inline=True, key=function.name, options=function.value)) return CONSUMED
def parse_inline(ctx: CTX, location: Location, content: Content, indentation: int) -> List[Component]: ctx.composer.push() try: while not content.halted(): if content.test(ctx.stop_mark): break location = content.get_location() mark = content.read_mark(inline_marks) signal = (parse_inline_function(ctx, mark, location, content) or parse_inline_container(ctx, mark, location, content, indentation) or parse_inline_style(ctx, mark, location, content, indentation) or parse_inline_link(ctx, mark, location, content, indentation) or parse_inline_token(ctx, mark, location, content, indentation) or parse_inline_text(ctx, mark, location, content, indentation)) if signal == PASS: raise StxError(f'Not implemented mark: {mark}') elif signal == CONSUMED: pass elif signal == EXIT: break else: raise AssertionError(f'Illegal signal: {signal}') except StxError as e: raise StxError('Error parsing inline content.', location) from e return ctx.composer.pop()
def parse_inline_container(ctx: CTX, mark: str, location: Location, content: Content, indentation: int) -> int: if mark != container_begin_mark: return PASS with ctx.using_stop_mark(container_end_mark): # It uses the original indentation # so the paragraph can be continued. contents = parse_inline(ctx, content.get_location(), content, indentation) if not content.pull(container_end_mark): raise StxError(f'Expected mark: {container_end_mark}') if not content.pull(function_begin_mark): raise StxError(f'Expected mark: {function_begin_mark}') skip_void(content) function_location = content.get_location() function = parse_entry(content) skip_void(content) if not content.pull(function_end_mark): raise StxError(f'Expected mark: {function_end_mark}') ctx.composer.add( FunctionCall(function_location, inline=True, key=function.name, options=function.value, argument=Composite(location, contents))) return CONSUMED
def parse_literal(ctx: CTX, mark: str, location: Location, content: Content, indentation_before_mark: int) -> int: if mark != literal_area_mark: return PASS content.read_spaces() function_location = content.get_location() function = try_parse_entry(content) content.expect_end_of_line() out = StringIO() while True: line = content.read_line(indentation_before_mark) if line is None: raise StxError(f'Expected: {mark}', content.get_location()) elif line.startswith(escape_char): line = line[1:] # remove escape char if not line.startswith(mark) and not line.startswith(escape_char): raise StxError(f'Invalid escaped sequence, expected:' f' {see(mark)} or {see(escape_char)}.') elif line.rstrip() == mark: break out.write(line) text = out.getvalue() if function is not None: component = FunctionCall( function_location, inline=False, key=function.name, options=function.value, argument=Literal(location, text), ) else: component = Literal(location, text) ctx.composer.add(component) return CONSUMED
def parse_values(content: Content) -> List[Value]: items = [] can_be_none = True while True: loc0 = content.get_location() value = try_parse_item(content) if value is None: if can_be_none: content.go_back(loc0) break else: raise Exception('Expected to read a group item') items.append(value) loc0 = content.get_location() skip_void(content) c = content.peek() if c == GROUP_SEPARATOR_CHAR: content.move_next() skip_void(content) can_be_none = False else: content.go_back(loc0) break return items
def try_parse_group(content: Content): c = content.peek() if c == GROUP_BEGIN_CHAR: content.move_next() skip_void(content) items = parse_values(content) skip_void(content) c = content.peek() if c != GROUP_END_CHAR: raise StxError('Expected group end char', content.get_location()) content.move_next() return Group(items) return None
def try_parse_token_or_entry( content: Content, allow_entry_separator=True) -> Union[Token, Entry, None]: text = try_parse_text(content) if text is None: return None loc0 = content.get_location() skip_void(content) c = content.peek() if c == ENTRY_SEPARATOR_CHAR and allow_entry_separator: content.move_next() skip_void(content) entry_name = text entry_value = try_parse_item(content, allow_entry_separator=False) if entry_value is None: raise StxError('Expected an entry value', content.get_location()) return Entry(entry_name, entry_value) group = try_parse_group(content) if group is not None: return Entry(text, group) # go before skipping void content.go_back(loc0) return Token(text)
def parse_table(ctx: CTX, mark: str, location: Location, content: Content) -> int: if mark == header_row_block_mark: header = True reuse_row = False elif mark == normal_row_block_mark: header = False reuse_row = False elif mark == cell_block_mark: header = False reuse_row = True else: return PASS table = ctx.composer.get_last_component() if not isinstance(table, Table): table = Table(location) ctx.composer.add(table) row = table.get_last_row() if reuse_row else None if row is None: row = TableRow(location, header) table.rows.append(row) # TODO is this ok? skip_void(content) indentation0 = content.column indentation = indentation0 while True: with ctx.using_stop_mark(cell_block_mark): cell = capture_component(ctx, indentation, True) row.cells.append(cell) if not ctx.reader.active(): break content = ctx.reader.get_content() loc0 = content.get_location() # Consume indentation when it is the beginning of the line if content.column == 0: if content.read_spaces(indentation0) < indentation0: content.go_back(loc0) break if content.peek() == cell_block_mark: content.move_next() content.read_spaces() indentation = content.column else: break return CONSUMED
def load_next_content(self) -> Optional[Content]: if len(self.file_paths) == 0: return None file_path = self.file_paths.pop(0) content = Content.from_file(file_path) return content
def try_parse_text(content: Content) -> Optional[str]: c = content.peek() if c in TOKEN_CHARS: out = StringIO() while True: out.write(c) content.move_next() c = content.peek() if c is None or c not in TOKEN_CHARS: break return out.getvalue() elif c == TOKEN_DELIMITER_CHAR: content.move_next() out = StringIO() while True: c = content.peek() if c is None: raise Exception(f'Expected {TOKEN_DELIMITER_CHAR}') elif c == TOKEN_DELIMITER_CHAR: content.move_next() break elif c == TOKEN_ESCAPE_CHAR: content.move_next() c = content.peek() if c is None: raise Exception('Expected escaped char.') elif c in [TOKEN_ESCAPE_CHAR, TOKEN_DELIMITER_CHAR]: out.write(c) content.move_next() else: # TODO add unicode support raise Exception(f'Invalid escaped char: {c}') else: content.move_next() out.write(c) return out.getvalue() return None
def parse_inline_text(ctx: CTX, mark: str, location: Location, content: Content, indentation: int) -> int: if mark is not None: return PASS out = StringIO() completed = False while content.peek() is not None: # Check if the text is broken by an inline or stop mark if content.test_any(inline_marks): break elif content.test(ctx.stop_mark): break c = content.peek() if c == '\n': out.write(c) content.move_next() # Check if the text is completed by an empty line if content.consume_empty_line(): completed = True break loc0 = content.get_location() spaces = content.read_spaces(indentation) # Check if the text is completed by indentation change if spaces < indentation: content.go_back(loc0) completed = True break # Check if the text is completed by a non-inline mark if content.test_any(not_inline_marks): content.go_back(loc0) completed = True break elif c == escape_char: content.move_next() escaped_mark = content.pull_any(all_marks) if escaped_mark is not None: out.write(escaped_mark) elif content.pull(ctx.stop_mark): out.write(ctx.stop_mark) elif content.pull(escape_char): out.write(escape_char) else: raise StxError('invalid escaped char') else: out.write(c) content.move_next() text = out.getvalue() if text == '': return EXIT ctx.composer.add(PlainText(location, text)) if completed: return EXIT return CONSUMED
def parse(text: str) -> Any: source = Content(text, 'test') return parse_value(source).to_any()