def sub(pattern, repl, string, count=0, flags=0): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the match object and must return a replacement string to be used.""" if _jsre._is_valid(pattern): return _jsre.sub(pattern, repl, string, count, flags) else: return _pyre().sub(pattern, repl, string, count, flags)
def mark(src): global refs t0 = time.time() refs = {} # split source in sections # sections can be : # - a block-level HTML element (markdown syntax will not be processed) # - a script # - a span-level HTML tag (markdown syntax will be processed) # - a code block # normalise line feeds src = src.replace("\r\n", "\n") # lines followed by dashes src = re.sub(r"(.*?)\n=+\n", "\n# \\1\n", src) src = re.sub(r"(.*?)\n-+\n", "\n## \\1\n", src) lines = src.split("\n") + [""] i = bq = 0 ul = ol = 0 while i < len(lines): # enclose lines starting by > in a blockquote if lines[i].startswith(">"): nb = 1 while nb < len(lines[i]) and lines[i][nb] == ">": nb += 1 lines[i] = lines[i][nb:] if nb > bq: lines.insert(i, "<blockquote>" * (nb - bq)) i += 1 bq = nb elif nb < bq: lines.insert(i, "</blockquote>" * (bq - nb)) i += 1 bq = nb elif bq > 0: lines.insert(i, "</blockquote>" * bq) i += 1 bq = 0 # unordered lists if ( lines[i].strip() and lines[i].lstrip()[0] in "-+*" and len(lines[i].lstrip()) > 1 and lines[i].lstrip()[1] == " " and (i == 0 or ul or not lines[i - 1].strip()) ): # line indentation indicates nesting level nb = 1 + len(lines[i]) - len(lines[i].lstrip()) lines[i] = "<li>" + lines[i][nb:] if nb > ul: lines.insert(i, "<ul>" * (nb - ul)) i += 1 elif nb < ul: lines.insert(i, "</ul>" * (ul - nb)) i += 1 ul = nb elif ul and not lines[i].strip(): if i < len(lines) - 1 and lines[i + 1].strip() and not lines[i + 1].startswith(" "): nline = lines[i + 1].lstrip() if nline[0] in "-+*" and len(nline) > 1 and nline[1] == " ": pass else: lines.insert(i, "</ul>" * ul) i += 1 ul = 0 # ordered lists mo = re.search(r"^(\d+\.)", lines[i]) if mo: if not ol: lines.insert(i, "<ol>") i += 1 lines[i] = "<li>" + lines[i][len(mo.groups()[0]) :] ol = 1 elif ( ol and not lines[i].strip() and i < len(lines) - 1 and not lines[i + 1].startswith(" ") and not re.search(r"^(\d+\.)", lines[i + 1]) ): lines.insert(i, "</ol>") i += 1 ol = 0 i += 1 if ul: lines.append("</ul>" * ul) if ol: lines.append("</ol>" * ol) if bq: lines.append("</blockquote>" * bq) t1 = time.time() # print('part 1', t1-t0) sections = [] scripts = [] section = Marked() i = 0 while i < len(lines): line = lines[i] if line.strip() and line.startswith(" "): if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line[4:]) j = i + 1 while j < len(lines) and lines[j].startswith(" "): section.lines.append(lines[j][4:]) j += 1 sections.append(section) section = Marked() i = j continue elif line.strip() and line.startswith("```"): # fenced code blocks à la Github Flavoured Markdown if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line) j = i + 1 while j < len(lines) and not lines[j].startswith("```"): section.lines.append(lines[j]) j += 1 sections.append(section) section = Marked() i = j + 1 continue elif line.lower().startswith("<script"): if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() j = i + 1 while j < len(lines): if lines[j].lower().startswith("</script>"): scripts.append("\n".join(lines[i + 1 : j])) for k in range(i, j + 1): lines[k] = "" break j += 1 i = j continue # atext header elif line.startswith("#"): level = 1 line = lines[i] while level < len(line) and line[level] == "#" and level <= 6: level += 1 if not line[level + 1 :].strip(): if level == 1: i += 1 continue else: lines[i] = "<H%s>%s</H%s>\n" % (level - 1, "#", level - 1) else: lines[i] = "<H%s>%s</H%s>\n" % (level, line[level + 1 :], level) else: mo = re.search(ref_pattern, line) if mo is not None: if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() key = mo.groups()[0] value = URL(mo.groups()[1]) refs[key.lower()] = value else: if not line.strip(): line = "<p></p>" if section.line: section.line += "\n" section.line += line i += 1 t2 = time.time() # print('section 2', t2-t1) if isinstance(section, Marked) and section.line: sections.append(section) res = "" for section in sections: mk, _scripts = section.to_html() res += mk scripts += _scripts # print('end mark', time.time()-t2) return res, scripts
def apply_markdown(src): scripts = [] key = None t0 = time.time() i = 0 while i < len(src): if src[i] == "[": start_a = i + 1 while True: end_a = src.find("]", i) if end_a == -1: break if src[end_a - 1] == "\\": i = end_a + 1 else: break if end_a > -1 and src[start_a:end_a].find("\n") == -1: link = src[start_a:end_a] rest = src[end_a + 1 :].lstrip() if rest and rest[0] == "(": j = 0 while True: end_href = rest.find(")", j) if end_href == -1: break if rest[end_href - 1] == "\\": j = end_href + 1 else: break if end_href > -1 and rest[:end_href].find("\n") == -1: tag = '<a href="' + rest[1:end_href] + '">' + link + "</a>" src = src[: start_a - 1] + tag + rest[end_href + 1 :] i = start_a + len(tag) elif rest and rest[0] == "[": j = 0 while True: end_key = rest.find("]", j) if end_key == -1: break if rest[end_key - 1] == "\\": j = end_key + 1 else: break if end_key > -1 and rest[:end_key].find("\n") == -1: if not key: key = link if key.lower() not in refs: raise KeyError("unknown reference %s" % key) url = refs[key.lower()] tag = '<a href="' + url + '">' + link + "</a>" src = src[: start_a - 1] + tag + rest[end_key + 1 :] i = start_a + len(tag) i += 1 t1 = time.time() # print('apply markdown 1', t1-t0) # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string rstr = "".join(random.choice(letters) for i in range(16)) i = 0 state = None start = -1 data = "" tags = [] while i < len(src): if src[i] == "<": j = i + 1 while j < len(src): if src[j] == '"' or src[j] == "'": if state == src[j] and src[j - 1] != "\\": state = None j = start + len(data) + 1 data = "" elif state == None: state = src[j] start = j else: data += src[j] elif src[j] == ">" and state is None: tags.append(src[i : j + 1]) src = src[:i] + rstr + src[j + 1 :] i += len(rstr) break elif state == '"' or state == "'": data += src[j] elif src[j] == "\n": # if a sign < is not followed by > in the same ligne, it # is the sign "lesser than" src = src[:i] + "<" + src[i + 1 :] j = i + 4 break j += 1 elif src[i] == "`" and i > 0 and src[i - 1] != "\\": # ignore the content of inline code j = i + 1 while j < len(src): if src[j] == "`" and src[j - 1] != "\\": break j += 1 i = j i += 1 t2 = time.time() # print('apply markdown 2', len(src), t2-t1) # escape "<", ">", "&" and "_" in inline code code_pattern = r"\`(.*?)\`" src = re.sub(code_pattern, s_escape, src) # replace escaped ` _ * by HTML characters src = src.replace(r"\\`", "`") src = src.replace(r"\_", "_") src = src.replace(r"\*", "*") # emphasis strong_patterns = [("STRONG", r"\*\*(.*?)\*\*"), ("B", r"__(.*?)__")] for tag, strong_pattern in strong_patterns: src = re.sub(strong_pattern, r"<%s>\1</%s>" % (tag, tag), src) em_patterns = [("EM", r"\*(.*?)\*"), ("I", r"\_(.*?)\_")] for tag, em_pattern in em_patterns: src = re.sub(em_pattern, r"<%s>\1</%s>" % (tag, tag), src) # inline code code_pattern = r"\`(.*?)\`" src = re.sub(code_pattern, r"<code>\1</code>", src) # restore tags while True: pos = src.rfind(rstr) if pos == -1: break repl = tags.pop() src = src[:pos] + repl + src[pos + len(rstr) :] src = "<p>" + src + "</p>" t3 = time.time() # print('apply markdown 3', t3-t2) return src, scripts
def mark(src): global refs t0 = time.time() refs = {} # split source in sections # sections can be : # - a block-level HTML element (markdown syntax will not be processed) # - a script # - a span-level HTML tag (markdown syntax will be processed) # - a code block # normalise line feeds src = src.replace('\r\n', '\n') # lines followed by dashes src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src) src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src) lines = src.split('\n') + [''] i = bq = 0 ul = ol = 0 while i < len(lines): # enclose lines starting by > in a blockquote if lines[i].startswith('>'): nb = 1 while nb < len(lines[i]) and lines[i][nb] == '>': nb += 1 lines[i] = lines[i][nb:] if nb > bq: lines.insert(i, '<blockquote>' * (nb - bq)) i += 1 bq = nb elif nb < bq: lines.insert(i, '</blockquote>' * (bq - nb)) i += 1 bq = nb elif bq > 0: lines.insert(i, '</blockquote>' * bq) i += 1 bq = 0 # unordered lists if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \ and len(lines[i].lstrip())>1 \ and lines[i].lstrip()[1]==' ' \ and (i==0 or ul or not lines[i-1].strip()): # line indentation indicates nesting level nb = 1 + len(lines[i]) - len(lines[i].lstrip()) lines[i] = '<li>' + lines[i][nb:] if nb > ul: lines.insert(i, '<ul>' * (nb - ul)) i += 1 elif nb < ul: lines.insert(i, '</ul>' * (ul - nb)) i += 1 ul = nb elif ul and not lines[i].strip(): if i<len(lines)-1 and lines[i+1].strip() \ and not lines[i+1].startswith(' '): nline = lines[i + 1].lstrip() if nline[0] in '-+*' and len(nline) > 1 and nline[1] == ' ': pass else: lines.insert(i, '</ul>' * ul) i += 1 ul = 0 # ordered lists mo = re.search(r'^(\d+\.)', lines[i]) if mo: if not ol: lines.insert(i, '<ol>') i += 1 lines[i] = '<li>' + lines[i][len(mo.groups()[0]):] ol = 1 elif ol and not lines[i].strip() and i<len(lines)-1 \ and not lines[i+1].startswith(' ') \ and not re.search(r'^(\d+\.)',lines[i+1]): lines.insert(i, '</ol>') i += 1 ol = 0 i += 1 if ul: lines.append('</ul>' * ul) if ol: lines.append('</ol>' * ol) if bq: lines.append('</blockquote>' * bq) t1 = time.time() #print('part 1', t1-t0) sections = [] scripts = [] section = Marked() i = 0 while i < len(lines): line = lines[i] if line.strip() and line.startswith(' '): if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line[4:]) j = i + 1 while j < len(lines) and lines[j].startswith(' '): section.lines.append(lines[j][4:]) j += 1 sections.append(section) section = Marked() i = j continue elif line.strip() and line.startswith("```"): # fenced code blocks à la Github Flavoured Markdown if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line) j = i + 1 while j < len(lines) and not lines[j].startswith("```"): section.lines.append(lines[j]) j += 1 sections.append(section) section = Marked() i = j + 1 continue elif line.lower().startswith('<script'): if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() j = i + 1 while j < len(lines): if lines[j].lower().startswith('</script>'): scripts.append('\n'.join(lines[i + 1:j])) for k in range(i, j + 1): lines[k] = '' break j += 1 i = j continue # atext header elif line.startswith('#'): level = 1 line = lines[i] while level < len(line) and line[level] == '#' and level <= 6: level += 1 if not line[level + 1:].strip(): if level == 1: i += 1 continue else: lines[i] = '<H%s>%s</H%s>\n' % (level - 1, '#', level - 1) else: lines[i] = '<H%s>%s</H%s>\n' % (level, line[level + 1:], level) else: mo = re.search(ref_pattern, line) if mo is not None: if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() key = mo.groups()[0] value = URL(mo.groups()[1]) refs[key.lower()] = value else: if not line.strip(): line = '<p></p>' if section.line: section.line += '\n' section.line += line i += 1 t2 = time.time() #print('section 2', t2-t1) if isinstance(section, Marked) and section.line: sections.append(section) res = '' for section in sections: mk, _scripts = section.to_html() res += mk scripts += _scripts #print('end mark', time.time()-t2) return res, scripts
def apply_markdown(src): scripts = [] key = None t0 = time.time() i = 0 while i < len(src): if src[i] == '[': start_a = i + 1 while True: end_a = src.find(']', i) if end_a == -1: break if src[end_a - 1] == '\\': i = end_a + 1 else: break if end_a > -1 and src[start_a:end_a].find('\n') == -1: link = src[start_a:end_a] rest = src[end_a + 1:].lstrip() if rest and rest[0] == '(': j = 0 while True: end_href = rest.find(')', j) if end_href == -1: break if rest[end_href - 1] == '\\': j = end_href + 1 else: break if end_href > -1 and rest[:end_href].find('\n') == -1: tag = '<a href="' + rest[ 1:end_href] + '">' + link + '</a>' src = src[:start_a - 1] + tag + rest[end_href + 1:] i = start_a + len(tag) elif rest and rest[0] == '[': j = 0 while True: end_key = rest.find(']', j) if end_key == -1: break if rest[end_key - 1] == '\\': j = end_key + 1 else: break if end_key > -1 and rest[:end_key].find('\n') == -1: if not key: key = link if key.lower() not in refs: raise KeyError('unknown reference %s' % key) url = refs[key.lower()] tag = '<a href="' + url + '">' + link + '</a>' src = src[:start_a - 1] + tag + rest[end_key + 1:] i = start_a + len(tag) i += 1 t1 = time.time() #print('apply markdown 1', t1-t0) # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string rstr = ''.join(random.choice(letters) for i in range(16)) i = 0 state = None start = -1 data = '' tags = [] while i < len(src): if src[i] == '<': j = i + 1 while j < len(src): if src[j] == '"' or src[j] == "'": if state == src[j] and src[j - 1] != '\\': state = None j = start + len(data) + 1 data = '' elif state == None: state = src[j] start = j else: data += src[j] elif src[j] == '>' and state is None: tags.append(src[i:j + 1]) src = src[:i] + rstr + src[j + 1:] i += len(rstr) break elif state == '"' or state == "'": data += src[j] elif src[j] == '\n': # if a sign < is not followed by > in the same ligne, it # is the sign "lesser than" src = src[:i] + '<' + src[i + 1:] j = i + 4 break j += 1 elif src[i] == '`' and i > 0 and src[i - 1] != '\\': # ignore the content of inline code j = i + 1 while j < len(src): if src[j] == '`' and src[j - 1] != '\\': break j += 1 i = j i += 1 t2 = time.time() #print('apply markdown 2', len(src), t2-t1) # escape "<", ">", "&" and "_" in inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, s_escape, src) # replace escaped ` _ * by HTML characters src = src.replace(r'\\`', '`') src = src.replace(r'\_', '_') src = src.replace(r'\*', '*') # emphasis strong_patterns = [('STRONG', r'\*\*(.*?)\*\*'), ('B', r'__(.*?)__')] for tag, strong_pattern in strong_patterns: src = re.sub(strong_pattern, r'<%s>\1</%s>' % (tag, tag), src) em_patterns = [('EM', r'\*(.*?)\*'), ('I', r'\_(.*?)\_')] for tag, em_pattern in em_patterns: src = re.sub(em_pattern, r'<%s>\1</%s>' % (tag, tag), src) # inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, r'<code>\1</code>', src) # restore tags while True: pos = src.rfind(rstr) if pos == -1: break repl = tags.pop() src = src[:pos] + repl + src[pos + len(rstr):] src = '<p>' + src + '</p>' t3 = time.time() #print('apply markdown 3', t3-t2) return src, scripts
def apply_markdown(src): scripts = [] key = None i = 0 while i < len(src): if src[i] == '[': start_a = i + 1 while True: end_a = src.find(']', i) if end_a == -1: break if src[end_a - 1] == '\\': i = end_a + 1 else: break if end_a > -1 and src[start_a:end_a].find('\n') == -1: link = src[start_a:end_a] rest = src[end_a + 1:].lstrip() if rest and rest[0] == '(': j = 0 while True: end_href = rest.find(')', j) if end_href == -1: break if rest[end_href - 1] == '\\': j = end_href + 1 else: break if end_href > -1 and rest[:end_href].find('\n') == -1: tag = ('<a href="' + rest[1:end_href] + '">' + link + '</a>') src = src[:start_a - 1] + tag + rest[end_href + 1:] i = start_a + len(tag) elif rest and rest[0] == '[': j = 0 while True: end_key = rest.find(']', j) if end_key == -1: break if rest[end_key - 1] == '\\': j = end_key + 1 else: break if end_key > -1 and rest[:end_key].find('\n') == -1: if not key: key = link if key.lower() not in refs: raise KeyError('unknown reference %s' % key) url = refs[key.lower()] tag = '<a href="' + url + '">' + link + '</a>' src = src[:start_a - 1] + tag + rest[end_key + 1:] i = start_a + len(tag) i += 1 # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string. The string starts # and ends with a whitespace so that in the new source, the text before # the random string has a word end. E.g. <h2>_italic_</h2> is replaced # by something like " agFyf_italic_ agFyf" so that the markdown for # _italic_ can be applied. rstr = ' ' + ''.join(random.choice(letters) for i in range(16)) + ' ' i = 0 state = None start = -1 data = '' tags = [] while i < len(src): if src[i] == '<': j = i + 1 while j < len(src): if src[j] == '"' or src[j] == "'": if state == src[j] and src[j - 1] != '\\': state = None j = start + len(data) + 1 data = '' elif state is None: state = src[j] start = j else: data += src[j] elif src[j] == '>' and state is None: tags.append(src[i:j + 1]) src = src[:i] + rstr + src[j + 1:] i += len(rstr) break elif state == '"' or state == "'": data += src[j] elif src[j] == '\n': # if a sign < is not followed by > in the same line, it # is the sign "lesser than" src = src[:i] + '<' + src[i + 1:] j = i + 4 break j += 1 elif src[i] == '`' and i > 0: if src[i - 1] != '\\': # ignore the content of inline code j = i + 1 while j < len(src): if src[j] == '`' and src[j - 1] != '\\': break j += 1 i = j else: # replace escaped ` by ` src = src[:i - 1] + "`" + src[i + 1:] i += 1 # escape "<", ">", "&" and "_" in inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, s_escape, src) # replace escaped ` _ * by HTML characters src = src.replace(r'\\`', '`') src = src.replace(r'\_', '_') src = src.replace(r'\*', '*') # emphasis strong_patterns = [('STRONG', r'\*\*(.+?)\*\*'), ('B', r'__(.+?)__')] for tag, strong_pattern in strong_patterns: src = re.sub(strong_pattern, r'<%s>\1</%s>' % (tag, tag), src) # EM for *xxx* src = re.sub(r'\*(.+?)\*', r'<%s>\1</%s>' % ('EM', 'EM'), src) # I for _xxx_ where the _ are at the beginning or end of a word # An underscore inside a word is ignored. src = re.sub(r'\b_(.*?)_\b', r'<I>\1</I>', src, flags=re.M) # inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, r'<code>\1</code>', src) # restore tags while True: pos = src.rfind(rstr) if pos == -1: break repl = tags.pop() src = src[:pos] + repl + src[pos + len(rstr):] src = '<p>' + src + '</p>' return src, scripts
def mark(src): global refs refs = {} # split source in sections # sections can be : # - a block-level HTML element (markdown syntax will not be processed) # - a script # - a span-level HTML tag (markdown syntax will be processed) # - a code block # normalise line feeds src = src.replace('\r\n','\n') # lines followed by dashes src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src) src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src) lines = src.split('\n') + [''] i = bq = 0 ul = ol = 0 while i<len(lines): # enclose lines starting by > in a blockquote if lines[i].startswith('>'): nb = 1 while nb < len(lines[i]) and lines[i][nb] == '>': nb += 1 lines[i] = lines[i][nb:] if nb>bq: lines.insert(i, '<blockquote>' * (nb - bq)) i += 1 bq = nb elif nb<bq: lines.insert(i, '</blockquote>' * (bq - nb)) i += 1 bq = nb elif bq>0: lines.insert(i, '</blockquote>' * bq) i += 1 bq = 0 # unordered lists if (lines[i].strip() and lines[i].lstrip()[0] in '-+*' and len(lines[i].lstrip()) > 1 and lines[i].lstrip()[1] == ' ' and (i == 0 or ul or not lines[i - 1].strip())): # line indentation indicates nesting level nb = 1 + len(lines[i]) - len(lines[i].lstrip()) lines[i] = '<li>' + lines[i][nb:] if nb>ul: lines.insert(i, '<ul>' * (nb - ul)) i += 1 elif nb<ul: lines.insert(i, '</ul>' * (ul - nb)) i += 1 ul = nb elif ul and not lines[i].strip(): if (i < len(lines) - 1 and lines[i+1].strip() and not lines[i + 1].startswith(' ')): nline = lines[i + 1].lstrip() if nline[0] in '-+*' and len(nline) > 1 and nline[1] == ' ': pass else: lines.insert(i, '</ul>' * ul) i += 1 ul = 0 # ordered lists mo = re.search(r'^(\d+\.)', lines[i]) if mo: if not ol: lines.insert(i, '<ol>') i += 1 lines[i] = '<li>' + lines[i][len(mo.groups()[0]):] ol = 1 elif (ol and not lines[i].strip() and i < len(lines) - 1 and not lines[i + 1].startswith(' ') and not re.search(r'^(\d+\.)', lines[i + 1])): lines.insert(i, '</ol>') i += 1 ol = 0 i += 1 if ul: lines.append('</ul>' * ul) if ol: lines.append('</ol>' * ol) if bq: lines.append('</blockquote>' * bq) sections = [] scripts = [] section = Marked() i = 0 while i < len(lines): line = lines[i] if line.strip() and line.startswith(' '): if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line[4:]) j = i + 1 while j < len(lines) and lines[j].startswith(' '): section.lines.append(lines[j][4:]) j += 1 sections.append(section) section = Marked() i = j continue elif line.strip() and line.startswith("```"): # fenced code blocks à la Github Flavoured Markdown if isinstance(section, Marked) and section.line: sections.append(section) section = CodeBlock(line) j = i + 1 while j < len(lines) and not lines[j].startswith("```"): section.lines.append(lines[j]) j += 1 sections.append(section) section = Marked() i = j+1 continue elif line.lower().startswith('<script'): if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() j = i + 1 while j < len(lines): if lines[j].lower().startswith('</script>'): scripts.append('\n'.join(lines[i + 1:j])) for k in range(i, j + 1): lines[k] = '' break j += 1 i = j continue # atext header elif line.startswith('#'): level = 1 line = lines[i] while level < len(line) and line[level] == '#' and level <= 6: level += 1 if not line[level + 1:].strip(): if level == 1: i += 1 continue else: lines[i] = '<H%s>%s</H%s>\n' %(level - 1, '#', level - 1) else: lines[i] = '<H%s>%s</H%s>\n' %(level, line[level + 1:], level) else: mo = re.search(ref_pattern, line) if mo is not None: if isinstance(section, Marked) and section.line: sections.append(section) section = Marked() key = mo.groups()[0] value = URL(mo.groups()[1]) refs[key.lower()] = value else: if not line.strip(): line = '<p></p>' if section.line: section.line += '\n' section.line += line i += 1 if isinstance(section, Marked) and section.line: sections.append(section) res = '' for section in sections: mk, _scripts = section.to_html() res += mk scripts += _scripts return res, scripts
def apply_markdown(src): scripts = [] key = None i = 0 while i < len(src): if src[i] == '[': start_a = i+1 while True: end_a = src.find(']', i) if end_a == -1: break if src[end_a - 1]=='\\': i = end_a + 1 else: break if end_a > -1 and src[start_a:end_a].find('\n') == -1: link = src[start_a:end_a] rest = src[end_a + 1:].lstrip() if rest and rest[0] == '(': j = 0 while True: end_href = rest.find(')', j) if end_href == -1: break if rest[end_href - 1] == '\\': j = end_href + 1 else: break if end_href > -1 and rest[:end_href].find('\n') == -1: tag = ('<a href="' + rest[1:end_href] + '">' + link + '</a>') src = src[:start_a - 1] + tag + rest[end_href + 1:] i = start_a + len(tag) elif rest and rest[0] == '[': j = 0 while True: end_key = rest.find(']', j) if end_key == -1: break if rest[end_key-1] == '\\': j = end_key + 1 else: break if end_key > -1 and rest[:end_key].find('\n') == -1: if not key: key = link if key.lower() not in refs: raise KeyError('unknown reference %s' %key) url = refs[key.lower()] tag = '<a href="' + url + '">' + link + '</a>' src = src[:start_a - 1] + tag + rest[end_key + 1:] i = start_a + len(tag) i += 1 # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string. The string starts # and ends with a whitespace so that in the new source, the text before # the random string has a word end. E.g. <h2>_italic_</h2> is replaced # by something like " agFyf_italic_ agFyf" so that the markdown for # _italic_ can be applied. rstr = ' '+''.join(random.choice(letters) for i in range(16)) + ' ' i = 0 state = None start = -1 data = '' tags = [] while i < len(src): if src[i] == '<': j = i + 1 while j < len(src): if src[j] == '"' or src[j] == "'": if state == src[j] and src[j - 1] != '\\': state = None j = start + len(data) + 1 data = '' elif state is None: state = src[j] start = j else: data += src[j] elif src[j] == '>' and state is None: tags.append(src[i:j + 1]) src = src[:i] + rstr + src[j + 1:] i += len(rstr) break elif state == '"' or state == "'": data += src[j] elif src[j] == '\n': # if a sign < is not followed by > in the same ligne, it # is the sign "lesser than" src = src[:i] + '<' + src[i + 1:] j = i + 4 break j += 1 elif src[i] == '`' and i > 0 and src[i - 1] != '\\': # ignore the content of inline code j = i + 1 while j < len(src): if src[j] == '`' and src[j - 1] != '\\': break j += 1 i = j i += 1 # escape "<", ">", "&" and "_" in inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, s_escape, src) # replace escaped ` _ * by HTML characters src = src.replace(r'\\`', '`') src = src.replace(r'\_', '_') src = src.replace(r'\*', '*') # emphasis strong_patterns = [('STRONG', r'\*\*(.+?)\*\*'), ('B', r'__(.+?)__')] for tag,strong_pattern in strong_patterns: src = re.sub(strong_pattern, r'<%s>\1</%s>' %(tag, tag), src) # EM for *xxx* src = re.sub(r'\*(.+?)\*', r'<%s>\1</%s>' %('EM', 'EM'), src) # I for _xxx_ where the _ are at the beginning or end of a word # An underscore inside a word is ignored. src = re.sub(r'\b_(.*?)_\b', r'<I>\1</I>', src, flags=re.M) # inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern, r'<code>\1</code>', src) # restore tags while True: pos = src.rfind(rstr) if pos==-1: break repl = tags.pop() src = src[:pos] + repl + src[pos + len(rstr):] src = '<p>' + src + '</p>' return src, scripts
def apply_markdown(src): scripts = [] key = None i = 0 while i<len(src): if src[i]=='[': start_a = i+1 while True: end_a = src.find(']',i) if end_a == -1: break if src[end_a-1]=='\\': i = end_a+1 else: break if end_a>-1 and src[start_a:end_a].find('\n')==-1: link = src[start_a:end_a] rest = src[end_a+1:].lstrip() if rest and rest[0]=='(': j = 0 while True: end_href = rest.find(')',j) if end_href == -1: break if rest[end_href-1]=='\\': j = end_href+1 else: break if end_href>-1 and rest[:end_href].find('\n')==-1: tag = '<a href="'+rest[1:end_href]+'">'+link+'</a>' src = src[:start_a-1]+tag+rest[end_href+1:] i = start_a+len(tag) elif rest and rest[0]=='[': j = 0 while True: end_key = rest.find(']',j) if end_key == -1: break if rest[end_key-1]=='\\': j = end_key+1 else: break if end_key>-1 and rest[:end_key].find('\n')==-1: if not key: key = link if key.lower() not in refs: raise KeyError('unknown reference %s' %key) url = refs[key.lower()] tag = '<a href="'+url+'">'+link+'</a>' src = src[:start_a-1]+tag+rest[end_key+1:] i = start_a+len(tag) i += 1 # before applying the markup with _ and *, isolate HTML tags because # they can contain these characters # We replace them temporarily by a random string rstr = ''.join(random.choice(letters) for i in range(16)) i = 0 state = None start = -1 data = '' tags = [] while i<len(src): if src[i]=='<': j = i+1 while j<len(src): if src[j]=='"' or src[j]=="'": if state==src[j] and src[j-1]!='\\': state = None #src = src[:start+1]+data+src[j:] j = start+len(data)+1 data = '' elif state==None: state = src[j] start = j else: data += src[j] elif src[j]=='>' and state is None: tags.append(src[i:j+1]) src = src[:i]+rstr+src[j+1:] i += len(rstr) break elif state=='"' or state=="'": data += src[j] elif src[j]=='\n': # if a sign < is not followed by > in the same ligne, it # is the sign "lesser than" src = src[:i]+'<'+src[i+1:] j=i+4 break j += 1 #i = j elif src[i]=='`' and i>0 and src[i-1]!='\\': # ignore the content of inline code j = i+1 while j<len(src): if src[j]=='`' and src[j-1]!='\\': break j += 1 i = j i += 1 # escape "<", ">", "&" and "_" in inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern,s_escape,src) # replace escaped ` _ * by HTML characters src = src.replace(r'\\`','`') src = src.replace(r'\_','_') src = src.replace(r'\*','*') # emphasis strong_patterns = [('STRONG',r'\*\*(.*?)\*\*'),('B',r'__(.*?)__')] for tag,strong_pattern in strong_patterns: src = re.sub(strong_pattern,r'<%s>\1</%s>' %(tag,tag),src) em_patterns = [('EM',r'\*(.*?)\*'),('I',r'\_(.*?)\_')] for tag,em_pattern in em_patterns: src = re.sub(em_pattern,r'<%s>\1</%s>' %(tag,tag),src) # inline code code_pattern = r'\`(.*?)\`' src = re.sub(code_pattern,r'<code>\1</code>',src) # restore tags while True: pos = src.rfind(rstr) if pos==-1: break repl = tags.pop() src = src[:pos]+repl+src[pos+len(rstr):] src = '<p>'+src+'</p>' return src,scripts