def parse_sentence(self, stop_tokens=None): content = [] stop_tokens = stop_tokens or set() stop_tokens = stop_tokens.union( {Token(TokenTypes.EOF), Token(TokenTypes.EOL)}) result = self.parse_styled_text(stop_tokens) while result is not None: content.append(result) result = self.parse_styled_text(stop_tokens) import itertools # Group consecutive WordNode nodes into a single TextNode grouped_nodes = [] for key, group in itertools.groupby(content, lambda x: x.__class__ == WordNode): if key: text = "".join([n.value for n in group]) grouped_nodes.append(TextNode(text)) else: grouped_nodes.extend(list(group)) return SentenceNode(content=grouped_nodes)
def _parse_escaped_char(self): self.get_token(TokenTypes.LITERAL, "\\") char = self.get_token().value if char not in "{}": char = f"\\{char}" self._save(TextNode(char))
def _parse_verbatim(self): self.get_token(TokenTypes.LITERAL, "`") text = self.collect_join( [Token(TokenTypes.LITERAL, "`"), Token(TokenTypes.EOF)], preserve_escaped_stop_tokens=True, ) self.get_token(TokenTypes.LITERAL, "`") text = f"`{text}`" self._save(TextNode(text))
def _parse_curly(self): variable_name = [] self.get_token(TokenTypes.LITERAL, "{") variable_name = self.collect_join( [Token(TokenTypes.LITERAL, "}"), Token(TokenTypes.EOF)] ) self.get_token(TokenTypes.LITERAL, "}") try: if "." not in variable_name: variable_value = self.variables[variable_name] else: namespace, variable_name = variable_name.split(".") variable_value = self.variables[namespace][variable_name] self._save(TextNode(variable_value)) except KeyError: raise PreprocessError(f'Attribute "{variable_name}" has not been defined')
def parse(self): self._parse() text = "".join([str(i.value) for i in self.nodes]) self.nodes = [TextNode(text)]
def _parse_pass(self): self._save(TextNode(self.get_token().value))
def _parse_source_engine(self, content, secondary_content, kwargs): # Parse a source block in the form # # [source, language, attributes...] # ---- # content # ---- # # Source blocks support the following attributes # # callouts=":" The separator used by callouts # highlight="@" The special character to turn on highlight # # [source, language, attributes...] # ---- # content:1: # ---- # # [source, language, attributes...] # ---- # content:@: # ---- # # Callout descriptions can be added to the block # as secondary content with the syntax # # [source, language, attributes...] # ---- # content:name: # ---- # <name>: <description> # # Since Mau uses Pygments, the attribute language # is one of the langauges supported by that tool. # Get the delimiter for callouts (":" by default) delimiter = kwargs.pop("callouts", ":") # A dictionary that contains callout markers in # the form {linenum:name} callout_markers = {} # Get the marker for highlighted lines ("@" by default) highlight_marker = kwargs.pop("highlight", "@") # A list of highlighted lines highlighted_lines = [] # This is a list of all lines that might contain # a callout. They will be further processed # later to be sure. lines_with_callouts = [(linenum, line) for linenum, line in enumerate(content) if line.endswith(delimiter)] # Each line in the previous list is processed # and stored if it contains a callout for linenum, line in lines_with_callouts: # Remove the final delimiter line = line[:-1] splits = line.split(delimiter) if len(splits) < 2: # It's a trap! There are no separators left continue # Get the callout and the line callout_name = splits[-1] line = delimiter.join(splits[:-1]) content[linenum] = line # Check if we want to just highlight the line if callout_name == highlight_marker: highlighted_lines.append(linenum) else: callout_markers[linenum] = callout_name # A dictionary that contains the text for each # marker in the form {name:text} callout_contents = {} # If there was secondary content it should be formatted # with callout names followed by colon and the # callout text. for line in secondary_content: if ":" not in line: self.error( f"Callout description should be written as 'name: text'. Missing ':' in '{line}'" ) name, text = line.split(":") if name not in callout_markers.values(): self.error( f"Callout {name} has not been created in the source code") text = text.strip() callout_contents[name] = text # Put markers and contents together callouts = {"markers": callout_markers, "contents": callout_contents} # Source blocks must preserve the content literally textlines = [TextNode(line) for line in content] return textlines, callouts, highlighted_lines
def _parse_block(self): # Parse a block in the form # # [block_type] # ---- # Content # ---- # Optional secondary content # # Blocks are delimited by 4 consecutive identical characters. # Get the delimiter and check the length delimiter = self.get_token(TokenTypes.TEXT).value if len(delimiter) != 4 or len(set(delimiter)) != 1: raise TokenError self.get_token(TokenTypes.EOL) # Collect everything until the next delimiter content = self._collect_lines( [Token(TokenTypes.TEXT, delimiter), Token(TokenTypes.EOF)]) self.force_token(TokenTypes.TEXT, delimiter) self.get_token(TokenTypes.EOL) # Get the optional secondary content secondary_content = self._collect_lines( [Token(TokenTypes.EOL), Token(TokenTypes.EOF)]) # Consume the title title = self._pop_title() # The first unnamed argument is the block type blocktype = self.argsparser.pop() # If there is a block alias for blocktype replace it # otherwise use the blocktype we already have blocktype = self.block_aliases.get(blocktype, blocktype) # Assign names self.argsparser.set_names_and_defaults( self.block_names.get(blocktype, []), self.block_defaults.get(blocktype, {})) # Consume the attributes args, kwargs = self.argsparser.get_arguments_and_reset() # Extract classes and convert them into a list classes = [ i for i in kwargs.pop("classes", "").split(",") if len(i) > 0 ] # Extract condition if present and process it condition = kwargs.pop("condition", "") # Run this only if there is a condition on this block if len(condition) > 0: try: # The condition should be either test:variable:value or test:variable: test, variable, value = condition.split(":") except ValueError: self.error( f'Condition {condition} is not in the form "test:variable:value" or "test:variable:' ) # If there is no value use True if len(value) == 0: value = True # Check if the variable matches the value and apply the requested test match = self.variables.get(variable) == value result = True if test == "if" else False # If the condition is not satisfied return if match is not result: return # Extract the preprocessor preprocessor = kwargs.pop("preprocessor", "none") # Extract the engine engine = kwargs.pop("engine", "default") # Create the node parameters according to the engine if engine in ["raw", "mau"]: # Engine "raw" doesn't process the content, # so we just pass it untouched in the form of # a TextNode per line. The same is true for "mau" # as the visitor will have to fire up an new parser # to process the content. content = [TextNode(line) for line in content] secondary_content = [TextNode(line) for line in secondary_content] elif engine == "source": # Engine "source" extracts the content (source code), # the callouts, and the highlights. # The default language is "text". content, callouts, highlights = self._parse_source_engine( content, secondary_content, kwargs) secondary_content = [] kwargs["callouts"] = callouts kwargs["highlights"] = highlights kwargs["language"] = kwargs.get("language", "text") elif engine == "default": # This is the default engine and it parses # both content and secondary content using a new parser # but then merges headers and footnotes into the # current one. # Parse the primary and secondary content and record footnotes pc = MainParser(variables=self.variables).analyse( "\n".join(content)) ps = MainParser(variables=self.variables).analyse( "\n".join(secondary_content)) content = pc.nodes secondary_content = ps.nodes self.footnote_defs.extend(pc.footnote_defs) self.headers.extend(pc.headers) else: raise EngineError(f"Engine {engine} is not available") self._save( BlockNode( blocktype=blocktype, content=content, secondary_content=secondary_content, args=args, classes=classes, engine=engine, preprocessor=preprocessor, kwargs=kwargs, title=title, ))
def _parse_source_block(self, content, secondary_content, title, args, kwargs): delimiter = kwargs.pop("callouts", ":") highlight_marker = kwargs.pop("highlight", "@") # A dictionary that contains callout markers in # the form {linenum:name} callout_markers = {} # A list of highlighted lines highlighted_lines = [] lines_with_callouts = [(linenum, line) for linenum, line in enumerate(content) if line.endswith(delimiter)] for linenum, line in lines_with_callouts: # Remove the final delimiter line = line[:-1] splits = line.split(delimiter) if len(splits) < 2: # It's a trap! There are no separators left continue callout_name = splits[-1] line = delimiter.join(splits[:-1]) content[linenum] = line if callout_name == highlight_marker: highlighted_lines.append(linenum) else: callout_markers[linenum] = callout_name # A dictionary that contains the text for each # marker in the form {name:text} callout_contents = {} for line in secondary_content: if ":" not in line: raise ParseError( f"Callout description should be written as 'linuenum: text'. Missing ':' in '{line}'" ) name, text = line.split(":") if name not in callout_markers.values(): raise ParseError( f"Callout {name} has not been created in the source code") text = text.strip() callout_contents[name] = text # Put markers and contents together callouts = {"markers": callout_markers, "contents": callout_contents} # Source blocks must preserve the content literally textlines = [TextNode(line) for line in content] _, kwargs = merge_args(args, kwargs, ["language"]) language = kwargs.pop("language", "text") self._save( SourceNode( language, callouts=callouts, highlights=highlighted_lines, delimiter=delimiter, code=textlines, title=title, kwargs=kwargs, ))
def _parse_raw_block(self, content, args, kwargs): textlines = [TextNode(line) for line in content] self._save(RawNode(content=textlines))
def _parse_source_block(self, content, secondary_content, title, args, kwargs): delimiter = kwargs.pop("callouts", ":") # This removes callouts from the source code # and maps line numbers to callouts and text # {linenum:(callout,text)} marked_lines = {} for linenum, line in enumerate(content): if not line.endswith(delimiter): continue # Remove the final delimiter line = line[:-1] splits = line.split(delimiter) if len(splits) < 2: # It's a trap! There are no separators left continue callout_name = splits[-1] line = delimiter.join(splits[:-1]) marked_lines[linenum] = (callout_name, line) # This maps callouts names to line numbers # {callout_name:linenum} callout_name_to_linenum = {} for linenum, mark in marked_lines.items(): callout_name, fixed_line = mark content[linenum] = fixed_line callout_name_to_linenum[callout_name] = linenum # This reads the callout text and connects # it with the text line callouts = {} for line in secondary_content: if ":" not in line: raise ParseError callout_name, text = line.split(": ") try: linenum = callout_name_to_linenum[callout_name] except KeyError: raise ParseError(f"Callout {callout_name} can't be found") callouts[linenum] = (callout_name, text) # Source blocks must preserve the content literally textlines = [TextNode(line) for line in content] _, kwargs = merge_args(args, kwargs, ["language"]) language = kwargs.pop("language", "text") self._save( SourceNode( language, callouts=callouts, delimiter=delimiter, code=textlines, title=title, kwargs=kwargs, ))