Example #1
0
def sub(pattern, repl, string, count=0, flags=0):
    """Return the string obtained by replacing the leftmost
    non-overlapping occurrences of the pattern in string by the
    replacement repl.  repl can be either a string or a callable;
    if a string, backslash escapes in it are processed.  If it is
    a callable, it's passed the match object and must return
    a replacement string to be used."""
    if _jsre._is_valid(pattern):
        return _jsre.sub(pattern, repl, string, count, flags)
    else:
        return _pyre().sub(pattern, repl, string, count, flags)
Example #2
0
def sub(pattern, repl, string, count=0, flags=0):
    """Return the string obtained by replacing the leftmost
    non-overlapping occurrences of the pattern in string by the
    replacement repl.  repl can be either a string or a callable;
    if a string, backslash escapes in it are processed.  If it is
    a callable, it's passed the match object and must return
    a replacement string to be used."""
    if _jsre._is_valid(pattern):
        return _jsre.sub(pattern, repl, string, count, flags)
    else:
        return _pyre().sub(pattern, repl, string, count, flags)
Example #3
0
def mark(src):

    global refs
    t0 = time.time()
    refs = {}
    # split source in sections
    # sections can be :
    # - a block-level HTML element (markdown syntax will not be processed)
    # - a script
    # - a span-level HTML tag (markdown syntax will be processed)
    # - a code block

    # normalise line feeds
    src = src.replace("\r\n", "\n")

    # lines followed by dashes
    src = re.sub(r"(.*?)\n=+\n", "\n# \\1\n", src)
    src = re.sub(r"(.*?)\n-+\n", "\n## \\1\n", src)

    lines = src.split("\n") + [""]

    i = bq = 0
    ul = ol = 0

    while i < len(lines):

        # enclose lines starting by > in a blockquote
        if lines[i].startswith(">"):
            nb = 1
            while nb < len(lines[i]) and lines[i][nb] == ">":
                nb += 1
            lines[i] = lines[i][nb:]
            if nb > bq:
                lines.insert(i, "<blockquote>" * (nb - bq))
                i += 1
                bq = nb
            elif nb < bq:
                lines.insert(i, "</blockquote>" * (bq - nb))
                i += 1
                bq = nb
        elif bq > 0:
            lines.insert(i, "</blockquote>" * bq)
            i += 1
            bq = 0

        # unordered lists
        if (
            lines[i].strip()
            and lines[i].lstrip()[0] in "-+*"
            and len(lines[i].lstrip()) > 1
            and lines[i].lstrip()[1] == " "
            and (i == 0 or ul or not lines[i - 1].strip())
        ):
            # line indentation indicates nesting level
            nb = 1 + len(lines[i]) - len(lines[i].lstrip())
            lines[i] = "<li>" + lines[i][nb:]
            if nb > ul:
                lines.insert(i, "<ul>" * (nb - ul))
                i += 1
            elif nb < ul:
                lines.insert(i, "</ul>" * (ul - nb))
                i += 1
            ul = nb
        elif ul and not lines[i].strip():
            if i < len(lines) - 1 and lines[i + 1].strip() and not lines[i + 1].startswith(" "):
                nline = lines[i + 1].lstrip()
                if nline[0] in "-+*" and len(nline) > 1 and nline[1] == " ":
                    pass
                else:
                    lines.insert(i, "</ul>" * ul)
                    i += 1
                    ul = 0

        # ordered lists
        mo = re.search(r"^(\d+\.)", lines[i])
        if mo:
            if not ol:
                lines.insert(i, "<ol>")
                i += 1
            lines[i] = "<li>" + lines[i][len(mo.groups()[0]) :]
            ol = 1
        elif (
            ol
            and not lines[i].strip()
            and i < len(lines) - 1
            and not lines[i + 1].startswith(" ")
            and not re.search(r"^(\d+\.)", lines[i + 1])
        ):
            lines.insert(i, "</ol>")
            i += 1
            ol = 0

        i += 1

    if ul:
        lines.append("</ul>" * ul)
    if ol:
        lines.append("</ol>" * ol)
    if bq:
        lines.append("</blockquote>" * bq)

    t1 = time.time()
    # print('part 1', t1-t0)
    sections = []
    scripts = []
    section = Marked()

    i = 0
    while i < len(lines):
        line = lines[i]
        if line.strip() and line.startswith("    "):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line[4:])
            j = i + 1
            while j < len(lines) and lines[j].startswith("    "):
                section.lines.append(lines[j][4:])
                j += 1
            sections.append(section)
            section = Marked()
            i = j
            continue

        elif line.strip() and line.startswith("```"):
            # fenced code blocks à la Github Flavoured Markdown
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line)
            j = i + 1
            while j < len(lines) and not lines[j].startswith("```"):
                section.lines.append(lines[j])
                j += 1
            sections.append(section)
            section = Marked()
            i = j + 1
            continue

        elif line.lower().startswith("<script"):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
                section = Marked()
            j = i + 1
            while j < len(lines):
                if lines[j].lower().startswith("</script>"):
                    scripts.append("\n".join(lines[i + 1 : j]))
                    for k in range(i, j + 1):
                        lines[k] = ""
                    break
                j += 1
            i = j
            continue

        # atext header
        elif line.startswith("#"):
            level = 1
            line = lines[i]
            while level < len(line) and line[level] == "#" and level <= 6:
                level += 1
            if not line[level + 1 :].strip():
                if level == 1:
                    i += 1
                    continue
                else:
                    lines[i] = "<H%s>%s</H%s>\n" % (level - 1, "#", level - 1)
            else:
                lines[i] = "<H%s>%s</H%s>\n" % (level, line[level + 1 :], level)

        else:
            mo = re.search(ref_pattern, line)
            if mo is not None:
                if isinstance(section, Marked) and section.line:
                    sections.append(section)
                    section = Marked()
                key = mo.groups()[0]
                value = URL(mo.groups()[1])
                refs[key.lower()] = value
            else:
                if not line.strip():
                    line = "<p></p>"
                if section.line:
                    section.line += "\n"
                section.line += line

            i += 1
    t2 = time.time()
    # print('section 2', t2-t1)
    if isinstance(section, Marked) and section.line:
        sections.append(section)

    res = ""
    for section in sections:
        mk, _scripts = section.to_html()
        res += mk
        scripts += _scripts
    # print('end mark', time.time()-t2)
    return res, scripts
Example #4
0
def apply_markdown(src):

    scripts = []
    key = None

    t0 = time.time()
    i = 0
    while i < len(src):
        if src[i] == "[":
            start_a = i + 1
            while True:
                end_a = src.find("]", i)
                if end_a == -1:
                    break
                if src[end_a - 1] == "\\":
                    i = end_a + 1
                else:
                    break
            if end_a > -1 and src[start_a:end_a].find("\n") == -1:
                link = src[start_a:end_a]
                rest = src[end_a + 1 :].lstrip()
                if rest and rest[0] == "(":
                    j = 0
                    while True:
                        end_href = rest.find(")", j)
                        if end_href == -1:
                            break
                        if rest[end_href - 1] == "\\":
                            j = end_href + 1
                        else:
                            break
                    if end_href > -1 and rest[:end_href].find("\n") == -1:
                        tag = '<a href="' + rest[1:end_href] + '">' + link + "</a>"
                        src = src[: start_a - 1] + tag + rest[end_href + 1 :]
                        i = start_a + len(tag)
                elif rest and rest[0] == "[":
                    j = 0
                    while True:
                        end_key = rest.find("]", j)
                        if end_key == -1:
                            break
                        if rest[end_key - 1] == "\\":
                            j = end_key + 1
                        else:
                            break
                    if end_key > -1 and rest[:end_key].find("\n") == -1:
                        if not key:
                            key = link
                        if key.lower() not in refs:
                            raise KeyError("unknown reference %s" % key)
                        url = refs[key.lower()]
                        tag = '<a href="' + url + '">' + link + "</a>"
                        src = src[: start_a - 1] + tag + rest[end_key + 1 :]
                        i = start_a + len(tag)

        i += 1

    t1 = time.time()
    # print('apply markdown 1', t1-t0)
    # before applying the markup with _ and *, isolate HTML tags because
    # they can contain these characters

    # We replace them temporarily by a random string
    rstr = "".join(random.choice(letters) for i in range(16))

    i = 0
    state = None
    start = -1
    data = ""
    tags = []
    while i < len(src):
        if src[i] == "<":
            j = i + 1
            while j < len(src):
                if src[j] == '"' or src[j] == "'":
                    if state == src[j] and src[j - 1] != "\\":
                        state = None
                        j = start + len(data) + 1
                        data = ""
                    elif state == None:
                        state = src[j]
                        start = j
                    else:
                        data += src[j]
                elif src[j] == ">" and state is None:
                    tags.append(src[i : j + 1])
                    src = src[:i] + rstr + src[j + 1 :]
                    i += len(rstr)
                    break
                elif state == '"' or state == "'":
                    data += src[j]
                elif src[j] == "\n":
                    # if a sign < is not followed by > in the same ligne, it
                    # is the sign "lesser than"
                    src = src[:i] + "&lt;" + src[i + 1 :]
                    j = i + 4
                    break
                j += 1
        elif src[i] == "`" and i > 0 and src[i - 1] != "\\":
            # ignore the content of inline code
            j = i + 1
            while j < len(src):
                if src[j] == "`" and src[j - 1] != "\\":
                    break
                j += 1
            i = j
        i += 1

    t2 = time.time()
    # print('apply markdown 2', len(src), t2-t1)

    # escape "<", ">", "&" and "_" in inline code
    code_pattern = r"\`(.*?)\`"
    src = re.sub(code_pattern, s_escape, src)

    # replace escaped ` _ * by HTML characters
    src = src.replace(r"\\`", "&#96;")
    src = src.replace(r"\_", "&#95;")
    src = src.replace(r"\*", "&#42;")

    # emphasis
    strong_patterns = [("STRONG", r"\*\*(.*?)\*\*"), ("B", r"__(.*?)__")]
    for tag, strong_pattern in strong_patterns:
        src = re.sub(strong_pattern, r"<%s>\1</%s>" % (tag, tag), src)

    em_patterns = [("EM", r"\*(.*?)\*"), ("I", r"\_(.*?)\_")]
    for tag, em_pattern in em_patterns:
        src = re.sub(em_pattern, r"<%s>\1</%s>" % (tag, tag), src)

    # inline code
    code_pattern = r"\`(.*?)\`"
    src = re.sub(code_pattern, r"<code>\1</code>", src)

    # restore tags
    while True:
        pos = src.rfind(rstr)
        if pos == -1:
            break
        repl = tags.pop()
        src = src[:pos] + repl + src[pos + len(rstr) :]

    src = "<p>" + src + "</p>"

    t3 = time.time()
    # print('apply markdown 3', t3-t2)

    return src, scripts
Example #5
0
def mark(src):

    global refs
    t0 = time.time()
    refs = {}
    # split source in sections
    # sections can be :
    # - a block-level HTML element (markdown syntax will not be processed)
    # - a script
    # - a span-level HTML tag (markdown syntax will be processed)
    # - a code block

    # normalise line feeds
    src = src.replace('\r\n', '\n')

    # lines followed by dashes
    src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src)
    src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src)

    lines = src.split('\n') + ['']

    i = bq = 0
    ul = ol = 0

    while i < len(lines):

        # enclose lines starting by > in a blockquote
        if lines[i].startswith('>'):
            nb = 1
            while nb < len(lines[i]) and lines[i][nb] == '>':
                nb += 1
            lines[i] = lines[i][nb:]
            if nb > bq:
                lines.insert(i, '<blockquote>' * (nb - bq))
                i += 1
                bq = nb
            elif nb < bq:
                lines.insert(i, '</blockquote>' * (bq - nb))
                i += 1
                bq = nb
        elif bq > 0:
            lines.insert(i, '</blockquote>' * bq)
            i += 1
            bq = 0

        # unordered lists
        if lines[i].strip() and lines[i].lstrip()[0] in '-+*' \
            and len(lines[i].lstrip())>1 \
            and lines[i].lstrip()[1]==' ' \
            and (i==0 or ul or not lines[i-1].strip()):
            # line indentation indicates nesting level
            nb = 1 + len(lines[i]) - len(lines[i].lstrip())
            lines[i] = '<li>' + lines[i][nb:]
            if nb > ul:
                lines.insert(i, '<ul>' * (nb - ul))
                i += 1
            elif nb < ul:
                lines.insert(i, '</ul>' * (ul - nb))
                i += 1
            ul = nb
        elif ul and not lines[i].strip():
            if i<len(lines)-1 and lines[i+1].strip() \
                and not lines[i+1].startswith(' '):
                nline = lines[i + 1].lstrip()
                if nline[0] in '-+*' and len(nline) > 1 and nline[1] == ' ':
                    pass
                else:
                    lines.insert(i, '</ul>' * ul)
                    i += 1
                    ul = 0

        # ordered lists
        mo = re.search(r'^(\d+\.)', lines[i])
        if mo:
            if not ol:
                lines.insert(i, '<ol>')
                i += 1
            lines[i] = '<li>' + lines[i][len(mo.groups()[0]):]
            ol = 1
        elif ol and not lines[i].strip() and i<len(lines)-1 \
            and not lines[i+1].startswith(' ') \
            and not re.search(r'^(\d+\.)',lines[i+1]):
            lines.insert(i, '</ol>')
            i += 1
            ol = 0

        i += 1

    if ul:
        lines.append('</ul>' * ul)
    if ol:
        lines.append('</ol>' * ol)
    if bq:
        lines.append('</blockquote>' * bq)

    t1 = time.time()
    #print('part 1', t1-t0)
    sections = []
    scripts = []
    section = Marked()

    i = 0
    while i < len(lines):
        line = lines[i]
        if line.strip() and line.startswith('    '):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line[4:])
            j = i + 1
            while j < len(lines) and lines[j].startswith('    '):
                section.lines.append(lines[j][4:])
                j += 1
            sections.append(section)
            section = Marked()
            i = j
            continue

        elif line.strip() and line.startswith("```"):
            # fenced code blocks à la Github Flavoured Markdown
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line)
            j = i + 1
            while j < len(lines) and not lines[j].startswith("```"):
                section.lines.append(lines[j])
                j += 1
            sections.append(section)
            section = Marked()
            i = j + 1
            continue

        elif line.lower().startswith('<script'):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
                section = Marked()
            j = i + 1
            while j < len(lines):
                if lines[j].lower().startswith('</script>'):
                    scripts.append('\n'.join(lines[i + 1:j]))
                    for k in range(i, j + 1):
                        lines[k] = ''
                    break
                j += 1
            i = j
            continue

        # atext header
        elif line.startswith('#'):
            level = 1
            line = lines[i]
            while level < len(line) and line[level] == '#' and level <= 6:
                level += 1
            if not line[level + 1:].strip():
                if level == 1:
                    i += 1
                    continue
                else:
                    lines[i] = '<H%s>%s</H%s>\n' % (level - 1, '#', level - 1)
            else:
                lines[i] = '<H%s>%s</H%s>\n' % (level, line[level + 1:], level)

        else:
            mo = re.search(ref_pattern, line)
            if mo is not None:
                if isinstance(section, Marked) and section.line:
                    sections.append(section)
                    section = Marked()
                key = mo.groups()[0]
                value = URL(mo.groups()[1])
                refs[key.lower()] = value
            else:
                if not line.strip():
                    line = '<p></p>'
                if section.line:
                    section.line += '\n'
                section.line += line

            i += 1
    t2 = time.time()
    #print('section 2', t2-t1)
    if isinstance(section, Marked) and section.line:
        sections.append(section)

    res = ''
    for section in sections:
        mk, _scripts = section.to_html()
        res += mk
        scripts += _scripts
    #print('end mark', time.time()-t2)
    return res, scripts
Example #6
0
def apply_markdown(src):

    scripts = []
    key = None

    t0 = time.time()
    i = 0
    while i < len(src):
        if src[i] == '[':
            start_a = i + 1
            while True:
                end_a = src.find(']', i)
                if end_a == -1:
                    break
                if src[end_a - 1] == '\\':
                    i = end_a + 1
                else:
                    break
            if end_a > -1 and src[start_a:end_a].find('\n') == -1:
                link = src[start_a:end_a]
                rest = src[end_a + 1:].lstrip()
                if rest and rest[0] == '(':
                    j = 0
                    while True:
                        end_href = rest.find(')', j)
                        if end_href == -1:
                            break
                        if rest[end_href - 1] == '\\':
                            j = end_href + 1
                        else:
                            break
                    if end_href > -1 and rest[:end_href].find('\n') == -1:
                        tag = '<a href="' + rest[
                            1:end_href] + '">' + link + '</a>'
                        src = src[:start_a - 1] + tag + rest[end_href + 1:]
                        i = start_a + len(tag)
                elif rest and rest[0] == '[':
                    j = 0
                    while True:
                        end_key = rest.find(']', j)
                        if end_key == -1:
                            break
                        if rest[end_key - 1] == '\\':
                            j = end_key + 1
                        else:
                            break
                    if end_key > -1 and rest[:end_key].find('\n') == -1:
                        if not key:
                            key = link
                        if key.lower() not in refs:
                            raise KeyError('unknown reference %s' % key)
                        url = refs[key.lower()]
                        tag = '<a href="' + url + '">' + link + '</a>'
                        src = src[:start_a - 1] + tag + rest[end_key + 1:]
                        i = start_a + len(tag)

        i += 1

    t1 = time.time()
    #print('apply markdown 1', t1-t0)
    # before applying the markup with _ and *, isolate HTML tags because
    # they can contain these characters

    # We replace them temporarily by a random string
    rstr = ''.join(random.choice(letters) for i in range(16))

    i = 0
    state = None
    start = -1
    data = ''
    tags = []
    while i < len(src):
        if src[i] == '<':
            j = i + 1
            while j < len(src):
                if src[j] == '"' or src[j] == "'":
                    if state == src[j] and src[j - 1] != '\\':
                        state = None
                        j = start + len(data) + 1
                        data = ''
                    elif state == None:
                        state = src[j]
                        start = j
                    else:
                        data += src[j]
                elif src[j] == '>' and state is None:
                    tags.append(src[i:j + 1])
                    src = src[:i] + rstr + src[j + 1:]
                    i += len(rstr)
                    break
                elif state == '"' or state == "'":
                    data += src[j]
                elif src[j] == '\n':
                    # if a sign < is not followed by > in the same ligne, it
                    # is the sign "lesser than"
                    src = src[:i] + '&lt;' + src[i + 1:]
                    j = i + 4
                    break
                j += 1
        elif src[i] == '`' and i > 0 and src[i - 1] != '\\':
            # ignore the content of inline code
            j = i + 1
            while j < len(src):
                if src[j] == '`' and src[j - 1] != '\\':
                    break
                j += 1
            i = j
        i += 1

    t2 = time.time()
    #print('apply markdown 2', len(src), t2-t1)

    # escape "<", ">", "&" and "_" in inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, s_escape, src)

    # replace escaped ` _ * by HTML characters
    src = src.replace(r'\\`', '&#96;')
    src = src.replace(r'\_', '&#95;')
    src = src.replace(r'\*', '&#42;')

    # emphasis
    strong_patterns = [('STRONG', r'\*\*(.*?)\*\*'), ('B', r'__(.*?)__')]
    for tag, strong_pattern in strong_patterns:
        src = re.sub(strong_pattern, r'<%s>\1</%s>' % (tag, tag), src)

    em_patterns = [('EM', r'\*(.*?)\*'), ('I', r'\_(.*?)\_')]
    for tag, em_pattern in em_patterns:
        src = re.sub(em_pattern, r'<%s>\1</%s>' % (tag, tag), src)

    # inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, r'<code>\1</code>', src)

    # restore tags
    while True:
        pos = src.rfind(rstr)
        if pos == -1:
            break
        repl = tags.pop()
        src = src[:pos] + repl + src[pos + len(rstr):]

    src = '<p>' + src + '</p>'

    t3 = time.time()
    #print('apply markdown 3', t3-t2)

    return src, scripts
Example #7
0
def apply_markdown(src):

    scripts = []
    key = None

    i = 0
    while i < len(src):
        if src[i] == '[':
            start_a = i + 1
            while True:
                end_a = src.find(']', i)
                if end_a == -1:
                    break
                if src[end_a - 1] == '\\':
                    i = end_a + 1
                else:
                    break
            if end_a > -1 and src[start_a:end_a].find('\n') == -1:
                link = src[start_a:end_a]
                rest = src[end_a + 1:].lstrip()
                if rest and rest[0] == '(':
                    j = 0
                    while True:
                        end_href = rest.find(')', j)
                        if end_href == -1:
                            break
                        if rest[end_href - 1] == '\\':
                            j = end_href + 1
                        else:
                            break
                    if end_href > -1 and rest[:end_href].find('\n') == -1:
                        tag = ('<a href="' + rest[1:end_href] + '">' + link +
                               '</a>')
                        src = src[:start_a - 1] + tag + rest[end_href + 1:]
                        i = start_a + len(tag)
                elif rest and rest[0] == '[':
                    j = 0
                    while True:
                        end_key = rest.find(']', j)
                        if end_key == -1:
                            break
                        if rest[end_key - 1] == '\\':
                            j = end_key + 1
                        else:
                            break
                    if end_key > -1 and rest[:end_key].find('\n') == -1:
                        if not key:
                            key = link
                        if key.lower() not in refs:
                            raise KeyError('unknown reference %s' % key)
                        url = refs[key.lower()]
                        tag = '<a href="' + url + '">' + link + '</a>'
                        src = src[:start_a - 1] + tag + rest[end_key + 1:]
                        i = start_a + len(tag)

        i += 1

    # before applying the markup with _ and *, isolate HTML tags because
    # they can contain these characters

    # We replace them temporarily by a random string. The string starts
    # and ends with a whitespace so that in the new source, the text before
    # the random string has a word end. E.g. <h2>_italic_</h2> is replaced
    # by something like " agFyf_italic_ agFyf" so that the markdown for
    # _italic_ can be applied.
    rstr = ' ' + ''.join(random.choice(letters) for i in range(16)) + ' '

    i = 0
    state = None
    start = -1
    data = ''
    tags = []
    while i < len(src):
        if src[i] == '<':
            j = i + 1
            while j < len(src):
                if src[j] == '"' or src[j] == "'":
                    if state == src[j] and src[j - 1] != '\\':
                        state = None
                        j = start + len(data) + 1
                        data = ''
                    elif state is None:
                        state = src[j]
                        start = j
                    else:
                        data += src[j]
                elif src[j] == '>' and state is None:
                    tags.append(src[i:j + 1])
                    src = src[:i] + rstr + src[j + 1:]
                    i += len(rstr)
                    break
                elif state == '"' or state == "'":
                    data += src[j]
                elif src[j] == '\n':
                    # if a sign < is not followed by > in the same line, it
                    # is the sign "lesser than"
                    src = src[:i] + '&lt;' + src[i + 1:]
                    j = i + 4
                    break
                j += 1
        elif src[i] == '`' and i > 0:
            if src[i - 1] != '\\':
                # ignore the content of inline code
                j = i + 1
                while j < len(src):
                    if src[j] == '`' and src[j - 1] != '\\':
                        break
                    j += 1
                i = j
            else:
                # replace escaped ` by &#96;
                src = src[:i - 1] + "&#96;" + src[i + 1:]
        i += 1

    # escape "<", ">", "&" and "_" in inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, s_escape, src)

    # replace escaped ` _ * by HTML characters
    src = src.replace(r'\\`', '&#96;')
    src = src.replace(r'\_', '&#95;')
    src = src.replace(r'\*', '&#42;')

    # emphasis
    strong_patterns = [('STRONG', r'\*\*(.+?)\*\*'), ('B', r'__(.+?)__')]
    for tag, strong_pattern in strong_patterns:
        src = re.sub(strong_pattern, r'<%s>\1</%s>' % (tag, tag), src)

    # EM for *xxx*
    src = re.sub(r'\*(.+?)\*', r'<%s>\1</%s>' % ('EM', 'EM'), src)

    # I for _xxx_ where the _ are at the beginning or end of a word
    # An underscore inside a word is ignored.
    src = re.sub(r'\b_(.*?)_\b', r'<I>\1</I>', src, flags=re.M)

    # inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, r'<code>\1</code>', src)

    # restore tags
    while True:
        pos = src.rfind(rstr)
        if pos == -1:
            break
        repl = tags.pop()
        src = src[:pos] + repl + src[pos + len(rstr):]

    src = '<p>' + src + '</p>'

    return src, scripts
Example #8
0
def mark(src):

    global refs
    refs = {}
    # split source in sections
    # sections can be :
    # - a block-level HTML element (markdown syntax will not be processed)
    # - a script
    # - a span-level HTML tag (markdown syntax will be processed)
    # - a code block

    # normalise line feeds
    src = src.replace('\r\n','\n')

    # lines followed by dashes
    src = re.sub(r'(.*?)\n=+\n', '\n# \\1\n', src)
    src = re.sub(r'(.*?)\n-+\n', '\n## \\1\n', src)

    lines = src.split('\n') + ['']

    i = bq = 0
    ul = ol = 0

    while i<len(lines):

        # enclose lines starting by > in a blockquote
        if lines[i].startswith('>'):
            nb = 1
            while nb < len(lines[i]) and lines[i][nb] == '>':
                nb += 1
            lines[i] = lines[i][nb:]
            if nb>bq:
                lines.insert(i, '<blockquote>' * (nb - bq))
                i += 1
                bq = nb
            elif nb<bq:
                lines.insert(i, '</blockquote>' * (bq - nb))
                i += 1
                bq = nb
        elif bq>0:
            lines.insert(i, '</blockquote>' * bq)
            i += 1
            bq = 0

        # unordered lists
        if (lines[i].strip() and lines[i].lstrip()[0] in '-+*'
                and len(lines[i].lstrip()) > 1
                and lines[i].lstrip()[1] == ' '
                and (i == 0 or ul or not lines[i - 1].strip())):
            # line indentation indicates nesting level
            nb = 1 + len(lines[i]) - len(lines[i].lstrip())
            lines[i] = '<li>' + lines[i][nb:]
            if nb>ul:
                lines.insert(i, '<ul>' * (nb - ul))
                i += 1
            elif nb<ul:
                lines.insert(i, '</ul>' * (ul - nb))
                i += 1
            ul = nb
        elif ul and not lines[i].strip():
            if (i < len(lines) - 1 and lines[i+1].strip()
                    and not lines[i + 1].startswith(' ')):
                nline = lines[i + 1].lstrip()
                if nline[0] in '-+*' and len(nline) > 1 and nline[1] == ' ':
                    pass
                else:
                    lines.insert(i, '</ul>' * ul)
                    i += 1
                    ul = 0

        # ordered lists
        mo = re.search(r'^(\d+\.)', lines[i])
        if mo:
            if not ol:
                lines.insert(i, '<ol>')
                i += 1
            lines[i] = '<li>' + lines[i][len(mo.groups()[0]):]
            ol = 1
        elif (ol and not lines[i].strip() and i < len(lines) - 1
                and not lines[i + 1].startswith(' ')
                and not re.search(r'^(\d+\.)', lines[i + 1])):
            lines.insert(i, '</ol>')
            i += 1
            ol = 0

        i += 1

    if ul:
        lines.append('</ul>' * ul)
    if ol:
        lines.append('</ol>' * ol)
    if bq:
        lines.append('</blockquote>' * bq)

    sections = []
    scripts = []
    section = Marked()

    i = 0
    while i < len(lines):
        line = lines[i]
        if line.strip() and line.startswith('    '):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line[4:])
            j = i + 1
            while j < len(lines) and lines[j].startswith('    '):
                section.lines.append(lines[j][4:])
                j += 1
            sections.append(section)
            section = Marked()
            i = j
            continue

        elif line.strip() and line.startswith("```"):
            # fenced code blocks à la Github Flavoured Markdown
            if isinstance(section, Marked) and section.line:
                sections.append(section)
            section = CodeBlock(line)
            j = i + 1
            while j < len(lines) and not lines[j].startswith("```"):
                section.lines.append(lines[j])
                j += 1
            sections.append(section)
            section = Marked()
            i = j+1
            continue

        elif line.lower().startswith('<script'):
            if isinstance(section, Marked) and section.line:
                sections.append(section)
                section = Marked()
            j = i + 1
            while j < len(lines):
                if lines[j].lower().startswith('</script>'):
                    scripts.append('\n'.join(lines[i + 1:j]))
                    for k in range(i, j + 1):
                        lines[k] = ''
                    break
                j += 1
            i = j
            continue

        # atext header
        elif line.startswith('#'):
            level = 1
            line = lines[i]
            while level < len(line) and line[level] == '#' and level <= 6:
                level += 1
            if not line[level + 1:].strip():
                if level == 1:
                    i += 1
                    continue
                else:
                    lines[i] = '<H%s>%s</H%s>\n' %(level - 1, '#', level - 1)
            else:
                lines[i] = '<H%s>%s</H%s>\n' %(level, line[level + 1:], level)

        else:
            mo = re.search(ref_pattern, line)
            if mo is not None:
                if isinstance(section, Marked) and section.line:
                    sections.append(section)
                    section = Marked()
                key = mo.groups()[0]
                value = URL(mo.groups()[1])
                refs[key.lower()] = value
            else:
                if not line.strip():
                    line = '<p></p>'
                if section.line:
                    section.line += '\n'
                section.line += line

            i += 1

    if isinstance(section, Marked) and section.line:
        sections.append(section)

    res = ''
    for section in sections:
        mk, _scripts = section.to_html()
        res += mk
        scripts += _scripts

    return res, scripts
Example #9
0
def apply_markdown(src):

    scripts = []
    key = None

    i = 0
    while i < len(src):
        if src[i] == '[':
            start_a = i+1
            while True:
                end_a = src.find(']', i)
                if end_a == -1:
                    break
                if src[end_a - 1]=='\\':
                    i = end_a + 1
                else:
                    break
            if end_a > -1 and src[start_a:end_a].find('\n') == -1:
                link = src[start_a:end_a]
                rest = src[end_a + 1:].lstrip()
                if rest and rest[0] == '(':
                    j = 0
                    while True:
                        end_href = rest.find(')', j)
                        if end_href == -1:
                            break
                        if rest[end_href - 1] == '\\':
                            j = end_href + 1
                        else:
                            break
                    if end_href > -1 and rest[:end_href].find('\n') == -1:
                        tag = ('<a href="' + rest[1:end_href] + '">' + link
                            + '</a>')
                        src = src[:start_a - 1] + tag + rest[end_href + 1:]
                        i = start_a + len(tag)
                elif rest and rest[0] == '[':
                    j = 0
                    while True:
                        end_key = rest.find(']', j)
                        if end_key == -1:
                            break
                        if rest[end_key-1] == '\\':
                            j = end_key + 1
                        else:
                            break
                    if end_key > -1 and rest[:end_key].find('\n') == -1:
                        if not key:
                            key = link
                        if key.lower() not in refs:
                            raise KeyError('unknown reference %s' %key)
                        url = refs[key.lower()]
                        tag = '<a href="' + url + '">' + link + '</a>'
                        src = src[:start_a - 1] + tag + rest[end_key + 1:]
                        i = start_a + len(tag)

        i += 1

    # before applying the markup with _ and *, isolate HTML tags because
    # they can contain these characters

    # We replace them temporarily by a random string. The string starts
    # and ends with a whitespace so that in the new source, the text before
    # the random string has a word end. E.g. <h2>_italic_</h2> is replaced
    # by something like " agFyf_italic_ agFyf" so that the markdown for
    # _italic_ can be applied.
    rstr = ' '+''.join(random.choice(letters) for i in range(16)) + ' '

    i = 0
    state = None
    start = -1
    data = ''
    tags = []
    while i < len(src):
        if src[i] == '<':
            j = i + 1
            while j < len(src):
                if src[j] == '"' or src[j] == "'":
                    if state == src[j] and src[j - 1] != '\\':
                        state = None
                        j = start + len(data) + 1
                        data = ''
                    elif state is None:
                        state = src[j]
                        start = j
                    else:
                        data += src[j]
                elif src[j] == '>' and state is None:
                    tags.append(src[i:j + 1])
                    src = src[:i] + rstr + src[j + 1:]
                    i += len(rstr)
                    break
                elif state == '"' or state == "'":
                    data += src[j]
                elif src[j] == '\n':
                    # if a sign < is not followed by > in the same ligne, it
                    # is the sign "lesser than"
                    src = src[:i] + '&lt;' + src[i + 1:]
                    j = i + 4
                    break
                j += 1
        elif src[i] == '`' and i > 0 and src[i - 1] != '\\':
            # ignore the content of inline code
            j = i + 1
            while j < len(src):
                if src[j] == '`' and src[j - 1] != '\\':
                    break
                j += 1
            i = j
        i += 1

    # escape "<", ">", "&" and "_" in inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, s_escape, src)

    # replace escaped ` _ * by HTML characters
    src = src.replace(r'\\`', '&#96;')
    src = src.replace(r'\_', '&#95;')
    src = src.replace(r'\*', '&#42;')

    # emphasis
    strong_patterns = [('STRONG', r'\*\*(.+?)\*\*'), ('B', r'__(.+?)__')]
    for tag,strong_pattern in strong_patterns:
        src = re.sub(strong_pattern, r'<%s>\1</%s>' %(tag, tag), src)

    # EM for *xxx*
    src = re.sub(r'\*(.+?)\*', r'<%s>\1</%s>' %('EM', 'EM'), src)

    # I for _xxx_ where the _ are at the beginning or end of a word
    # An underscore inside a word is ignored.
    src = re.sub(r'\b_(.*?)_\b', r'<I>\1</I>', src,
        flags=re.M)

    # inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern, r'<code>\1</code>', src)

    # restore tags
    while True:
        pos = src.rfind(rstr)
        if pos==-1:
            break
        repl = tags.pop()
        src = src[:pos] + repl + src[pos + len(rstr):]

    src = '<p>' + src + '</p>'

    return src, scripts
Example #10
0
def apply_markdown(src):

    scripts = []
    key = None

    i = 0
    while i<len(src):
        if src[i]=='[':
            start_a = i+1
            while True:
                end_a = src.find(']',i)
                if end_a == -1:
                    break
                if src[end_a-1]=='\\':
                    i = end_a+1
                else:
                    break
            if end_a>-1 and src[start_a:end_a].find('\n')==-1:
                link = src[start_a:end_a]
                rest = src[end_a+1:].lstrip()
                if rest and rest[0]=='(':
                    j = 0
                    while True:
                        end_href = rest.find(')',j)
                        if end_href == -1:
                            break
                        if rest[end_href-1]=='\\':
                            j = end_href+1
                        else:
                            break
                    if end_href>-1 and rest[:end_href].find('\n')==-1:
                        tag = '<a href="'+rest[1:end_href]+'">'+link+'</a>'
                        src = src[:start_a-1]+tag+rest[end_href+1:]
                        i = start_a+len(tag)
                elif rest and rest[0]=='[':
                    j = 0
                    while True:
                        end_key = rest.find(']',j)
                        if end_key == -1:
                            break
                        if rest[end_key-1]=='\\':
                            j = end_key+1
                        else:
                            break
                    if end_key>-1 and rest[:end_key].find('\n')==-1:
                        if not key:
                            key = link
                        if key.lower() not in refs:
                            raise KeyError('unknown reference %s' %key)
                        url = refs[key.lower()]
                        tag = '<a href="'+url+'">'+link+'</a>'
                        src = src[:start_a-1]+tag+rest[end_key+1:]
                        i = start_a+len(tag)
        
        i += 1

    # before applying the markup with _ and *, isolate HTML tags because 
    # they can contain these characters

    # We replace them temporarily by a random string
    rstr = ''.join(random.choice(letters) for i in range(16))
    
    i = 0
    state = None
    start = -1
    data = ''
    tags = []
    while i<len(src):
        if src[i]=='<':
            j = i+1
            while j<len(src):
                if src[j]=='"' or src[j]=="'":
                    if state==src[j] and src[j-1]!='\\':
                        state = None
                        #src = src[:start+1]+data+src[j:]
                        j = start+len(data)+1
                        data = ''
                    elif state==None:
                        state = src[j]
                        start = j
                    else:
                        data += src[j]
                elif src[j]=='>' and state is None:
                    tags.append(src[i:j+1])
                    src = src[:i]+rstr+src[j+1:]
                    i += len(rstr)
                    break
                elif state=='"' or state=="'":
                    data += src[j]
                elif src[j]=='\n':
                    # if a sign < is not followed by > in the same ligne, it
                    # is the sign "lesser than"
                    src = src[:i]+'&lt;'+src[i+1:]
                    j=i+4
                    break
                j += 1
            #i = j
        elif src[i]=='`' and i>0 and src[i-1]!='\\':
            # ignore the content of inline code
            j = i+1
            while j<len(src):
                if src[j]=='`' and src[j-1]!='\\':
                    break
                j += 1
            i = j
        i += 1                    

    # escape "<", ">", "&" and "_" in inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern,s_escape,src)

    # replace escaped ` _ * by HTML characters
    src = src.replace(r'\\`','&#96;')
    src = src.replace(r'\_','&#95;')
    src = src.replace(r'\*','&#42;')

    # emphasis
    strong_patterns = [('STRONG',r'\*\*(.*?)\*\*'),('B',r'__(.*?)__')]
    for tag,strong_pattern in strong_patterns:
        src = re.sub(strong_pattern,r'<%s>\1</%s>' %(tag,tag),src)

    em_patterns = [('EM',r'\*(.*?)\*'),('I',r'\_(.*?)\_')]
    for tag,em_pattern in em_patterns:
        src = re.sub(em_pattern,r'<%s>\1</%s>' %(tag,tag),src)

    # inline code
    code_pattern = r'\`(.*?)\`'
    src = re.sub(code_pattern,r'<code>\1</code>',src)
    
    # restore tags
    while True:
        pos = src.rfind(rstr)
        if pos==-1:
            break
        repl = tags.pop()
        src = src[:pos]+repl+src[pos+len(rstr):]

    src = '<p>'+src+'</p>'

    return src,scripts