def target(state: StateBlock, startLine: int, endLine: int, silent: bool): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False text = state.src[pos:maximum].strip() if not text.startswith("("): return False if not text.endswith(")="): return False if not text[1:-2]: return False if silent: return True state.line = startLine + 1 token = state.push("myst_target", "", 0) token.attrSet("class", "myst-target") token.content = text[1:-2] token.map = [startLine, state.line] return True
def amsmath_block(state: StateBlock, startLine: int, endLine: int, silent: bool): # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False begin = state.bMarks[startLine] + state.tShift[startLine] outcome = match_environment(state.src[begin:]) if not outcome: return False environment, numbered, endpos = outcome endpos += begin line = startLine while line < endLine: if endpos >= state.bMarks[line] and endpos <= state.eMarks[line]: # line for end of block math found ... state.line = line + 1 break line += 1 if not silent: token = state.push("amsmath", "math", 0) token.block = True token.content = state.src[begin:endpos] token.meta = {"environment": environment, "numbered": numbered} token.map = [startLine, line] return True
def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False marker = state.srcCharCode[pos] pos += 1 # Check block marker /* % */ if marker != 0x25: return False if silent: return True state.line = startLine + 1 token = state.push("myst_line_comment", "", 0) token.attrSet("class", "myst-line-comment") token.content = state.src[pos:maximum].strip() token.map = [startLine, state.line] token.markup = chr(marker) return True
def front_matter(state: StateBlock, start_line: int, end_line: int, silent: bool): # grab initial data if it's : separated if start_line != 0: return False # Since start is found, we can report success here in validation mode # if silent: # return True # Search for the end of the block next_line = start_line start_content = 0 meta = {} while True: next_line += 1 if next_line >= end_line: # unclosed block should be autoclosed by end of document. return False start = state.bMarks[next_line] maximum = state.eMarks[next_line] if start == maximum: # empty line is terminator break key_value = state.src[start:maximum].split(":", 1) if len(key_value) != 2: # Error here, we have no k/v separator return False meta[key_value[0].lower()] = key_value[1] old_parent = state.parentType old_line_max = state.lineMax state.parentType = "container" # this will prevent lazy continuations from ever going past our end marker state.lineMax = next_line token = state.push("pelican_frontmatter", "", 0) # token.hidden = True token.content = state.src[state.bMarks[start_content]:state. eMarks[next_line]] token.block = True token.meta = meta state.parentType = old_parent state.lineMax = old_line_max state.line = next_line token.map = [start_line, state.line] # consider taking the content into a dictionary and checking for Title: .+\n return True
def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False marker = state.srcCharCode[pos] pos += 1 # Check block marker /* + */ if marker != 0x2B: return False # markers can be mixed with spaces, but there should be at least 3 of them cnt = 1 while pos < maximum: ch = state.srcCharCode[pos] if ch != marker and not isSpace(ch): break if ch == marker: cnt += 1 pos += 1 if cnt < 3: return False if silent: return True state.line = startLine + 1 token = state.push("myst_block_break", "hr", 0) token.attrSet("class", "myst-block") token.content = state.src[pos:maximum].strip() token.map = [startLine, state.line] token.markup = chr(marker) * cnt return True
def _substitution_block(state: StateBlock, startLine: int, endLine: int, silent: bool): startPos = state.bMarks[startLine] + state.tShift[startLine] end = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False lineText = state.src[startPos:end].strip() try: if (lineText[0] != start_delimiter or lineText[1] != start_delimiter or lineText[-1] != end_delimiter or lineText[-2] != end_delimiter or len(lineText) < 5): return False except IndexError: return False text = lineText[2:-2].strip() # special case if multiple on same line, e.g. {{a}}{{b}} if (end_delimiter * 2) in text: return False state.line = startLine + 1 if silent: return True token = state.push("substitution_block", "div", 0) token.block = True token.content = text token.attrSet("class", "substitution") token.attrSet("text", text) token.markup = f"{start_delimiter}{end_delimiter}" token.map = [startLine, state.line] return True
def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False if state.src[pos] != "%": return False if silent: return True token = state.push("myst_line_comment", "", 0) token.attrSet("class", "myst-line-comment") token.content = state.src[pos + 1:maximum].rstrip() token.markup = "%" # search end of block while appending lines to `token.content` for nextLine in itertools.count(startLine + 1): if nextLine >= endLine: break pos = state.bMarks[nextLine] + state.tShift[nextLine] maximum = state.eMarks[nextLine] if state.src[pos] != "%": break token.content += "\n" + state.src[pos + 1:maximum].rstrip() state.line = nextLine token.map = [startLine, nextLine] return True
def target(state: StateBlock, startLine: int, endLine: int, silent: bool): pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False match = TARGET_PATTERN.match(state.src[pos:maximum]) if not match: return False if silent: return True state.line = startLine + 1 token = state.push("myst_target", "", 0) token.attrSet("class", "myst-target") token.content = match.group(1) token.map = [startLine, state.line] return True
def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool): auto_closed = False start = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # Check out the first character quickly, # this should filter out most of non-containers if marker_char != charCodeAt(state.src, start): return False # Check out the rest of the marker string pos = start + 1 while pos <= maximum: if marker_str[(pos - start) % marker_len] != state.src[pos]: break pos += 1 marker_count = floor((pos - start) / marker_len) if marker_count < min_markers: return False pos -= (pos - start) % marker_len markup = state.src[start:pos] params = state.src[pos:maximum] if not validate(params, markup): return False # Since start is found, we can report success here in validation mode if silent: return True # Search for the end of the block nextLine = startLine while True: nextLine += 1 if nextLine >= endLine: # unclosed block should be autoclosed by end of document. # also block seems to be autoclosed by end of parent break start = state.bMarks[nextLine] + state.tShift[nextLine] maximum = state.eMarks[nextLine] if start < maximum and state.sCount[nextLine] < state.blkIndent: # non-empty line with negative indent should stop the list: # - ``` # test break if marker_char != charCodeAt(state.src, start): continue if state.sCount[nextLine] - state.blkIndent >= 4: # closing fence should be indented less than 4 spaces continue pos = start + 1 while pos <= maximum: if marker_str[(pos - start) % marker_len] != state.src[pos]: break pos += 1 # closing code fence must be at least as long as the opening one if floor((pos - start) / marker_len) < marker_count: continue # make sure tail has spaces only pos -= (pos - start) % marker_len pos = state.skipSpaces(pos) if pos < maximum: continue # found! auto_closed = True break old_parent = state.parentType old_line_max = state.lineMax state.parentType = "container" # this will prevent lazy continuations from ever going past our end marker state.lineMax = nextLine token = state.push(f"container_{name}_open", "div", 1) token.markup = markup token.block = True token.info = params token.map = [startLine, nextLine] state.md.block.tokenize(state, startLine + 1, nextLine) token = state.push(f"container_{name}_close", "div", -1) token.markup = state.src[start:pos] token.block = True state.parentType = old_parent state.lineMax = old_line_max state.line = nextLine + (1 if auto_closed else 0) return True
def _math_block_dollar(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: # TODO internal backslash escaping haveEndMarker = False startPos = state.bMarks[startLine] + state.tShift[startLine] end = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False if startPos + 2 > end: return False if (state.srcCharCode[startPos] != 0x24 or state.srcCharCode[startPos + 1] != 0x24): # /* $ */ return False # search for end of block nextLine = startLine label = None # search for end of block on same line lineText = state.src[startPos:end] if len(lineText.strip()) > 3: if lineText.strip().endswith("$$"): haveEndMarker = True end = end - 2 - (len(lineText) - len(lineText.strip())) elif allow_labels: # reverse the line and match eqnoMatch = DOLLAR_EQNO_REV.match(lineText[::-1]) if eqnoMatch: haveEndMarker = True label = eqnoMatch.group(1)[::-1] end = end - eqnoMatch.end() # search for end of block on subsequent line if not haveEndMarker: while True: nextLine += 1 if nextLine >= endLine: break start = state.bMarks[nextLine] + state.tShift[nextLine] end = state.eMarks[nextLine] if end - start < 2: continue lineText = state.src[start:end] if lineText.strip().endswith("$$"): haveEndMarker = True end = end - 2 - (len(lineText) - len(lineText.strip())) break # reverse the line and match if allow_labels: eqnoMatch = DOLLAR_EQNO_REV.match(lineText[::-1]) if eqnoMatch: haveEndMarker = True label = eqnoMatch.group(1)[::-1] end = end - eqnoMatch.end() break if not haveEndMarker: return False state.line = nextLine + (1 if haveEndMarker else 0) token = state.push("math_block_label" if label else "math_block", "math", 0) token.block = True token.content = state.src[startPos + 2:end] token.markup = "$$" token.map = [startLine, state.line] if label: token.info = label if label_normalizer is None else label_normalizer( label) return True
def _rule(state: StateBlock, startLine: int, endLine: int, silent: bool): haveEndMarker = False pos = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False if pos + 3 > maximum: return False marker = state.srcCharCode[pos] # /* : */ if marker != 0x3A: return False # scan marker length mem = pos pos = state.skipChars(pos, marker) length = pos - mem if length < 3: return False markup = state.src[mem:pos] params = state.src[pos:maximum] # Since start is found, we can report success here in validation mode if silent: return True # search end of block nextLine = startLine while True: nextLine += 1 if nextLine >= endLine: # unclosed block should be autoclosed by end of document. # also block seems to be autoclosed by end of parent break pos = mem = state.bMarks[nextLine] + state.tShift[nextLine] maximum = state.eMarks[nextLine] if pos < maximum and state.sCount[nextLine] < state.blkIndent: # non-empty line with negative indent should stop the list: # - ``` # test break if state.srcCharCode[pos] != marker: continue if state.sCount[nextLine] - state.blkIndent >= 4: # closing fence should be indented less than 4 spaces continue pos = state.skipChars(pos, marker) # closing code fence must be at least as long as the opening one if pos - mem < length: continue # make sure tail has spaces only pos = state.skipSpaces(pos) if pos < maximum: continue haveEndMarker = True # found! break # If a fence has heading spaces, they should be removed from its inner block length = state.sCount[startLine] state.line = nextLine + (1 if haveEndMarker else 0) token = state.push("colon_fence", "code", 0) token.info = stripEscape(params) token.content = state.getLines(startLine + 1, nextLine, length, True) token.markup = markup token.map = [startLine, state.line] return True
def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool): auto_closed = False start = state.bMarks[startLine] + state.tShift[startLine] maximum = state.eMarks[startLine] # Check out the first character of the first line quickly, # this should filter out non-front matter if startLine != 0 or marker_char != state.srcCharCode[0]: return False # Check out the rest of the marker string # while pos <= 3 pos = start + 1 while pos <= maximum: if marker_str[(pos - start) % marker_len] != state.src[pos]: start_content = pos + 1 break pos += 1 marker_count = floor((pos - start) / marker_len) if marker_count < min_markers: return False pos -= (pos - start) % marker_len # Since start is found, we can report success here in validation mode if silent: return True # Search for the end of the block nextLine = startLine while True: nextLine += 1 if nextLine >= endLine: # unclosed block should be autoclosed by end of document. return False if state.src[start:maximum] == "...": break start = state.bMarks[nextLine] + state.tShift[nextLine] maximum = state.eMarks[nextLine] if start < maximum and state.sCount[nextLine] < state.blkIndent: # non-empty line with negative indent should stop the list: # - ``` # test break if marker_char != state.srcCharCode[start]: continue if state.sCount[nextLine] - state.blkIndent >= 4: # closing fence should be indented less than 4 spaces continue pos = start + 1 while pos < maximum: if marker_str[(pos - start) % marker_len] != state.src[pos]: break pos += 1 # closing code fence must be at least as long as the opening one if floor((pos - start) / marker_len) < marker_count: continue # make sure tail has spaces only pos -= (pos - start) % marker_len pos = state.skipSpaces(pos) if pos < maximum: continue # found! auto_closed = True break old_parent = state.parentType old_line_max = state.lineMax state.parentType = "container" # this will prevent lazy continuations from ever going past our end marker state.lineMax = nextLine token = state.push("front_matter", "", 0) token.hidden = True token.markup = marker_str * min_markers token.content = state.src[state.bMarks[startLine + 1]:state.eMarks[nextLine - 1]] token.block = True token.meta = state.src[start_content:start - 1] state.parentType = old_parent state.lineMax = old_line_max state.line = nextLine + (1 if auto_closed else 0) token.map = [startLine, state.line] return True
def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool): if silent: # quirk: validation mode validates a dd block only, not a whole deflist if state.ddIndent < 0: return False return skipMarker(state, startLine) >= 0 nextLine = startLine + 1 if nextLine >= endLine: return False if state.isEmpty(nextLine): nextLine += 1 if nextLine >= endLine: return False if state.sCount[nextLine] < state.blkIndent: return False contentStart = skipMarker(state, nextLine) if contentStart < 0: return False # Start list listTokIdx = len(state.tokens) tight = True token = state.push("dl_open", "dl", 1) token.map = listLines = [startLine, 0] # Iterate list items dtLine = startLine ddLine = nextLine # One definition list can contain multiple DTs, # and one DT can be followed by multiple DDs. # # Thus, there is two loops here, and label is # needed to break out of the second one # break_outer = False while True: prevEmptyEnd = False token = state.push("dt_open", "dt", 1) token.map = [dtLine, dtLine] token = state.push("inline", "", 0) token.map = [dtLine, dtLine] token.content = state.getLines(dtLine, dtLine + 1, state.blkIndent, False).strip() token.children = [] token = state.push("dt_close", "dt", -1) while True: token = state.push("dd_open", "dd", 1) token.map = itemLines = [nextLine, 0] pos = contentStart maximum = state.eMarks[ddLine] offset = (state.sCount[ddLine] + contentStart - (state.bMarks[ddLine] + state.tShift[ddLine])) while pos < maximum: ch = state.srcCharCode[pos] if isSpace(ch): if ch == 0x09: offset += 4 - offset % 4 else: offset += 1 else: break pos += 1 contentStart = pos oldTight = state.tight oldDDIndent = state.ddIndent oldIndent = state.blkIndent oldTShift = state.tShift[ddLine] oldSCount = state.sCount[ddLine] oldParentType = state.parentType state.blkIndent = state.ddIndent = state.sCount[ddLine] + 2 state.tShift[ddLine] = contentStart - state.bMarks[ddLine] state.sCount[ddLine] = offset state.tight = True state.parentType = "deflist" state.md.block.tokenize(state, ddLine, endLine, True) # If any of list item is tight, mark list as tight if not state.tight or prevEmptyEnd: tight = False # Item become loose if finish with empty line, # but we should filter last element, because it means list finish prevEmptyEnd = (state.line - ddLine) > 1 and state.isEmpty(state.line - 1) state.tShift[ddLine] = oldTShift state.sCount[ddLine] = oldSCount state.tight = oldTight state.parentType = oldParentType state.blkIndent = oldIndent state.ddIndent = oldDDIndent token = state.push("dd_close", "dd", -1) itemLines[1] = nextLine = state.line if nextLine >= endLine: break_outer = True break if state.sCount[nextLine] < state.blkIndent: break_outer = True break contentStart = skipMarker(state, nextLine) if contentStart < 0: break ddLine = nextLine # go to the next loop iteration: # insert DD tag and repeat checking if break_outer: break_outer = False break if nextLine >= endLine: break dtLine = nextLine if state.isEmpty(dtLine): break if state.sCount[dtLine] < state.blkIndent: break ddLine = dtLine + 1 if ddLine >= endLine: break if state.isEmpty(ddLine): ddLine += 1 if ddLine >= endLine: break if state.sCount[ddLine] < state.blkIndent: break contentStart = skipMarker(state, ddLine) if contentStart < 0: break # go to the next loop iteration: # insert DT and DD tags and repeat checking # Finalise list token = state.push("dl_close", "dl", -1) listLines[1] = nextLine state.line = nextLine # mark paragraphs tight if needed if tight: markTightParagraphs(state, listTokIdx) return True
def _fieldlist_rule(state: StateBlock, startLine: int, endLine: int, silent: bool): # adapted from markdown_it/rules_block/list.py::list_block # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: return False posAfterName, name_text = parseNameMarker(state, startLine) if posAfterName < 0: return False # For validation mode we can terminate immediately if silent: return True # start field list token = state.push("field_list_open", "dl", 1) token.attrSet("class", "field-list") token.map = listLines = [startLine, 0] # iterate list items nextLine = startLine with set_parent_type(state, "fieldlist"): while nextLine < endLine: # create name tokens token = state.push("fieldlist_name_open", "dt", 1) token.map = [startLine, startLine] token = state.push("inline", "", 0) token.map = [startLine, startLine] token.content = name_text token.children = [] token = state.push("fieldlist_name_close", "dt", -1) # set indent positions pos = posAfterName maximum = state.eMarks[nextLine] offset = ( state.sCount[nextLine] + posAfterName - (state.bMarks[startLine] + state.tShift[startLine]) ) # find indent to start of body on first line while pos < maximum: ch = state.srcCharCode[pos] if ch == 0x09: # \t offset += 4 - (offset + state.bsCount[nextLine]) % 4 elif ch == 0x20: # \s offset += 1 else: break pos += 1 contentStart = pos # set indent for body text if contentStart >= maximum: # no body on first line, so use constant indentation # TODO adapt to indentation of subsequent lines? indent = 2 else: indent = offset # Run subparser on the field body token = state.push("fieldlist_body_open", "dd", 1) token.map = itemLines = [startLine, 0] # change current state, then restore it after parser subcall oldTShift = state.tShift[startLine] oldSCount = state.sCount[startLine] oldBlkIndent = state.blkIndent state.tShift[startLine] = contentStart - state.bMarks[startLine] state.sCount[startLine] = offset state.blkIndent = indent state.md.block.tokenize(state, startLine, endLine) state.blkIndent = oldBlkIndent state.tShift[startLine] = oldTShift state.sCount[startLine] = oldSCount token = state.push("fieldlist_body_close", "dd", -1) nextLine = startLine = state.line itemLines[1] = nextLine if nextLine >= endLine: break contentStart = state.bMarks[startLine] # Try to check if list is terminated or continued. if state.sCount[nextLine] < state.blkIndent: break # if it's indented more than 3 spaces, it should be a code block if state.sCount[startLine] - state.blkIndent >= 4: break # get next field item posAfterName, name_text = parseNameMarker(state, startLine) if posAfterName < 0: break # Finalize list token = state.push("field_list_close", "dl", -1) listLines[1] = nextLine state.line = nextLine return True