Example #1
0
        def finish():
            assert len(counts) == len(styles)

            from mwlib.parser import styleanalyzer
            states = styleanalyzer.compute_path(counts)

            last_apocount = 0
            for i, s in enumerate(states):
                apos = "'" * (s.apocount - last_apocount)
                if apos:
                    styles[i].children.insert(0, T(type=T.t_text, text=apos))
                last_apocount = s.apocount

                if s.is_bold and s.is_italic:
                    styles[i].caption = "'''"
                    inner = T(type=T.t_complex_style,
                              caption="''",
                              children=styles[i].children)
                    styles[i].children = [inner]
                elif s.is_bold:
                    styles[i].caption = "'''"
                elif s.is_italic:
                    styles[i].caption = "''"
                else:
                    styles[i].type = T.t_complex_node
Example #2
0
    def run(self):
        tokens = self.tokens
        i = 0
        start = None
        while i < len(tokens):
            t = tokens[i]

            if t.type == T.t_urllink and start is None:
                start = i
                i += 1
            elif t.type == T.t_special and t.text == "]" and start is not None:
                sub = self.tokens[start + 1:i]
                self.tokens[start:i + 1] = [
                    T(type=T.t_complex_named_url,
                      children=sub,
                      caption=self.tokens[start].text[1:])
                ]
                i = start
                start = None
            elif t.type == T.t_2box_close and start is not None:
                self.tokens[i].type = T.t_special
                self.tokens[i].text = "]"
                sub = self.tokens[start + 1:i]
                self.tokens[start:i] = [
                    T(type=T.t_complex_named_url,
                      children=sub,
                      caption=self.tokens[start].text[1:])
                ]
                i = start
                start = None
            else:
                i += 1
Example #3
0
    def create_source(self, name, vlist, inner, xopts):
        children = [T(type=T.t_text, text=inner)]
        blocknode = True
        if vlist and vlist.get("enclose", "") == "none":
            blocknode = False

        return T(type=T.t_complex_tag,
                 tagname=name,
                 vlist=vlist,
                 children=children,
                 blocknode=blocknode)
Example #4
0
def fix_break_between_pre(tokens, xopt):
    idx = 0
    while idx < len(tokens) - 1:
        t = tokens[idx]
        if t.type == T.t_break and t.text.startswith(" ") and tokens[
                idx + 1].type == T.t_pre:
            tokens[idx:idx + 1] = [
                T(type=T.t_pre, text=" "),
                T(type=T.t_newline, text=u"\n")
            ]
            idx += 2
        else:
            idx += 1
Example #5
0
        def makecell(skip_end=0):
            st = tokens[start].text.strip()
            if st == "|":
                self.is_header = False
            elif st == "!":
                self.is_header = True
            is_header = self.is_header

            if tokens[start].rawtagname == "th":
                is_header = True
            elif tokens[start].rawtagname == "td":
                is_header = False

            if is_header:
                tagname = "th"
            else:
                tagname = "td"

            search_modifier = tokens[start].text.strip() in ("|", "!", "||", "!!")
            sub = tokens[start + 1:i - skip_end]
            self.replace_tablecaption(sub)
            tokens[start:i] = [T(type=T.t_complex_table_cell, tagname=tagname,
                                 start=tokens[start].start, children=sub,
                                 vlist=tokens[start].vlist, is_header=is_header)]
            if search_modifier:
                self.find_modifier(tokens[start])
Example #6
0
    def close_stack(self, spos, tokens, pos):
        close = self.stack[spos:]
        del self.stack[spos:]
        close.reverse()

        for i, t in close:
            vlist = tokens[i].vlist
            display = vlist.get("style", {}).get("display", "").lower()
            if display == "inline":
                blocknode = False
            elif display == "block":
                blocknode = True
            else:
                blocknode = t.blocknode

            sub = tokens[i + 1:pos]
            tokens[i:pos] = [
                T(type=T.t_complex_tag,
                  children=sub,
                  tagname=t.tagname,
                  blocknode=blocknode,
                  vlist=tokens[i].vlist)
            ]
            pos = i + 1

        return pos
Example #7
0
def extract_garbage(tokens, is_allowed, is_whitespace=None):
    if is_whitespace is None:
        def is_whitespace(t): return t.type in (T.t_newline, T.t_break)

    res = []
    i = 0
    start = None

    while i < len(tokens):
        if is_whitespace(tokens[i]):
            if start is None:
                start = i
            i += 1
        elif is_allowed(tokens[i]):
            start = None
            i += 1
        else:
            if start is None:
                start = i
            i += 1

            # find end of garbage

            while i < len(tokens):
                if is_allowed(tokens[i]):
                    break
                i += 1

            garbage = tokens[start:i]
            del tokens[start:i]
            i = start
            res.append(T(type=T.t_complex_node, children=garbage))

    return res
Example #8
0
 def create_gallery(self, name, vlist, inner, xopts):
     sub = _parse_gallery_txt(inner, xopts)
     return T(type=T.t_complex_tag,
              tagname="gallery",
              vlist=vlist,
              children=sub,
              blocknode=True)
Example #9
0
 def run(self):
     tokens = self.tokens
     i = 0
     start = None
     while i < len(tokens):
         t = tokens[i]
         if t.type == T.t_pre:
             assert start is None
             start = i
             i += 1
         elif t.type == T.t_newline and start is not None:
             sub = tokens[start + 1:i + 1]
             if start > 0 and tokens[start -
                                     1].type == T.t_complex_preformatted:
                 del tokens[start:i + 1]
                 tokens[start - 1].children.extend(sub)
                 i = start
             else:
                 tokens[start:i + 1] = [
                     T(type=T.t_complex_preformatted,
                       children=sub,
                       blocknode=True)
                 ]
                 i = start + 1
             start = None
         elif t.blocknode or (t.type == T.t_complex_tag and t.tagname in
                              ("blockquote", "table", "timeline", "div")):
             start = None
             i += 1
         else:
             i += 1
Example #10
0
 def replace_tablecaption(self, children):
     i = 0
     while i < len(children):
         if children[i].type == T.t_tablecaption:
             children[i].type = T.t_special
             children[i].text = u"|"
             children.insert(i + 1, T(type=T.t_text, text="+"))
         i += 1
Example #11
0
 def splitdl(self, item):
     for i, x in enumerate(item.children):
         if x.type == T.t_special and x.text == ':':
             s = T(type=T.t_complex_style,
                   caption=':',
                   children=item.children[i + 1:])
             del item.children[i:]
             return s
Example #12
0
    def _create_generic(self, name, vlist, inner, xopts):
        if not vlist:
            vlist = {}
        if name in self.tagextensions:
            node = self.tagextensions[name](inner, vlist)
            if node is None:
                retval = None
            else:
                retval = T(type=T.t_complex_compat, compatnode=node)

            return retval

        children = [T(type=T.t_text, text=inner)]
        return T(type=T.t_complex_tag,
                 tagname=name,
                 vlist=vlist,
                 children=children)
Example #13
0
 def create(delta=1):
     sub = tokens[first:i]
     if sub:
         tokens[first:i + delta] = [
             T(type=T.t_complex_tag,
               tagname='p',
               children=sub,
               blocknode=True)
         ]
Example #14
0
    def create():
        if not state or i <= start:
            return False

        children = tokens[start:i]
        for tag, tok in state.items():
            outer = T(type=T.t_complex_tag,
                      tagname=tag,
                      children=children,
                      vlist=tok.vlist)
            children = [outer]
        tokens[start:i] = [outer]
        return True
Example #15
0
    def create_imagemap(self, name, vlist, inner, xopts):
        from mwlib import imgmap
        txt = inner
        t = T(type=T.t_complex_tag, tagname="imagemap", vlist=vlist)
        t.imagemap = imgmap.ImageMapFromString(txt)
        if t.imagemap.image:
            t.imagemap.imagelink = None
            s = u"[[" + t.imagemap.image + u"]]"
            res = parse_txt(s, xopts)
            if res and res[0].type == T.t_complex_link and res[0].ns == 6:
                t.imagemap.imagelink = res[0]

        return t
Example #16
0
        def create():
            if current.start is None or current.endtitle is None:
                return False

            l1 = tokens[current.start].text.count("=")
            l2 = tokens[current.endtitle].text.count("=")
            level = min(l1, l2)

            # FIXME: make this a caption
            caption = T(type=T.t_complex_node,
                        children=tokens[current.start + 1:current.endtitle])
            if l2 > l1:
                caption.children.append(T(type=T.t_text,
                                          text=u"=" * (l2 - l1)))
            elif l1 > l2:
                caption.children.insert(
                    0, T(type=T.t_text, text=u"=" * (l1 - l2)))

            body = T(type=T.t_complex_node,
                     children=tokens[current.endtitle + 1:i])

            sect = T(type=T.t_complex_section,
                     tagname="@section",
                     children=[caption, body],
                     level=level,
                     blocknode=True)
            tokens[current.start:i] = [sect]

            while sections and level <= sections[-1].level:
                sections.pop()
            if sections:
                sections[-1].children.append(tokens[current.start])
                del tokens[current.start]
                current.start -= 1

            sections.append(sect)
            return True
Example #17
0
def fixlitags(tokens, xopts):
    root = T(type=T.t_complex_tag, tagname="div")
    todo = [(root, tokens)]
    while todo:
        parent, tokens = todo.pop()
        if parent.tagname not in ("ol", "ul"):
            idx = 0
            while idx < len(tokens):
                start = idx
                while idx < len(tokens) and tokens[idx].tagname == "li":
                    idx += 1

                if idx > start:
                    lst = T(type=T.t_complex_tag,
                            tagname="ul",
                            children=tokens[start:idx])
                    tokens[start:idx + 1] = [lst]
                    idx = start + 1
                else:
                    idx += 1

        for t in tokens:
            if t.children:
                todo.append((t, t.children))
Example #18
0
            def appendline():
                line = lines[startpos]
                if endtag:
                    for i, x in enumerate(line.children):
                        if x.rawtagname == endtag and x.type == T.t_html_tag_end:
                            after = line.children[i + 1:]
                            del line.children[i:]
                            item.children.append(line)
                            lines[startpos] = T(type=T.t_complex_line,
                                                tagname="p",
                                                lineprefix=None,
                                                children=after)
                            return

                item.children.append(lines[startpos])
                del lines[startpos]
Example #19
0
 def maketable():
     start = stack.pop()
     starttoken = tokens[start]
     sub = tokens[start + 1:i]
     from mwlib.refine import core
     tp = core.tagparser()
     tp.add("caption", 5)
     tp(sub, self.xopts)
     tokens[start:i + 1] = [T(type=T.t_complex_table,
                              tagname="table", start=tokens[start].start, children=sub,
                              vlist=starttoken.vlist, blocknode=True)]
     if starttoken.text.strip() == "{|":
         self.find_modifier(tokens[start])
     self.handle_rows(sub)
     self.find_caption(tokens[start])
     return start
Example #20
0
def _parse_gallery_txt(txt, xopts):
    lines = [x.strip() for x in txt.split("\n")]
    sub = []
    for x in lines:
        if not x:
            continue

        assert xopts.expander is not None, "no expander in _parse_gallery_txt"
        xnew = xopts.expander.parseAndExpand(x, keep_uniq=True)

        linode = parse_txt(u'[[' + xnew + ']]', xopts)

        if linode:
            n = linode[0]
            if n.ns == nshandling.NS_IMAGE:
                sub.append(n)
                continue
        sub.append(T(type=T.t_text, text=xnew))
    return sub
Example #21
0
    def create_ref(self, name, vlist, inner, xopts):
        expander = xopts.expander
        if expander is not None and inner:
            inner = expander.parseAndExpand(inner, True)

        if inner:
            # <ref>* not an item</ref>
            children = parse_txt("<br />" + inner, xopts)
            if children[0].children:  # paragraph had been created...
                del children[0].children[0]
            else:
                del children[0]
        else:
            children = []

        return T(type=T.t_complex_tag,
                 tagname="ref",
                 vlist=vlist,
                 children=children)
Example #22
0
    def create_poem(self, name, vlist, inner, xopts):
        expander = xopts.expander
        if expander is not None and inner:
            inner = expander.parseAndExpand(inner, True)

        res = []
        res.append(u"\n")
        for line in inner.split("\n"):
            if line.strip():
                res.append(":")
            if line.startswith(" "):
                res.append(u"&nbsp;")
            res.append(line.strip())
            res.append(u"\n")
        res.append(u"\n")
        res = u"".join(res)
        children = parse_txt(res, xopts)
        return T(type=T.t_complex_tag,
                 tagname="poem",
                 vlist=vlist,
                 children=children)
Example #23
0
    def find_caption(self, table):
        children = table.children
        start = None
        i = 0
        while i < len(children):
            t = children[i]
            if t.type == T.t_tablecaption:
                start = i
                i += 1
                break

            if t.text is None or t.text.strip():
                return
            i += 1

        modifier = None

        while i < len(children):
            t = children[i]
            if t.tagname not in ("ref", ) and (t.text is None
                                               or t.text.startswith("\n")):
                if modifier:
                    mod = T.join_as_text(children[start:modifier])
                    vlist = util.parseParams(mod)
                    sub = children[modifier + 1:i]
                else:
                    sub = children[start + 1:i]
                    vlist = {}

                caption = T(type=T.t_complex_caption,
                            children=sub,
                            vlist=vlist)
                children[start:i] = [caption]
                return
            elif t.text == "|" and modifier is None:
                modifier = i
            elif t.type == T.t_2box_open and modifier is None:
                modifier = 0

            i += 1
Example #24
0
    def create_pages(self, name, vlist, inner, xopts):
        expander = xopts.expander

        if not vlist:
            vlist = {}
        s = vlist.get("from")
        e = vlist.get("to")
        children = []
        if s and e and expander:
            nshandler = expander.nshandler
            page_ns = nshandler._find_namespace("Page")[1]

            try:
                si = int(s)
                ei = int(e)
            except ValueError:
                s = nshandler.get_fqname(s, page_ns)
                e = nshandler.get_fqname(e, page_ns)
                pages = expander.db.select(s, e)
            else:
                base = vlist.get("index", "")
                base = nshandler.get_fqname(base, page_ns)
                pages = [u"%s/%s" % (base, i) for i in range(si, ei + 1)]

            rawtext = u"".join(u"{{%s}}\n" % x for x in pages)
            te = expander.__class__(rawtext,
                                    pagename=expander.pagename,
                                    wikidb=expander.db)
            children = parse_txt(te.expandTemplates(True),
                                 xopts=XBunch(**xopts.__dict__),
                                 expander=te,
                                 uniquifier=te.uniquifier)

        return T(type=T.t_complex_tag,
                 tagname=name,
                 vlist=vlist,
                 children=children)
Example #25
0
 def create_timeline(self, name, vlist, inner, xopts):
     return T(type=T.t_complex_tag,
              tagname="timeline",
              vlist=vlist,
              timeline=inner,
              blocknode=True)
Example #26
0
    def run(self):
        tokens = self.tokens
        i = 0
        lines = []
        startline = None
        firsttoken = None

        def getlineprefix():
            return (tokens[startline].text or "").strip()

        while i < len(self.tokens):
            t = tokens[i]
            if t.type in (T.t_item, T.t_colon):
                if firsttoken is None:
                    firsttoken = i
                startline = i
                i += 1
            elif t.type == T.t_newline and startline is not None:
                sub = self.tokens[startline + 1:i + 1]
                lines.append(
                    T(type=T.t_complex_line,
                      start=tokens[startline].start,
                      len=0,
                      children=sub,
                      lineprefix=getlineprefix()))
                startline = None
                i += 1
            elif t.type == T.t_break:
                if startline is not None:
                    sub = self.tokens[startline + 1:i]
                    lines.append(
                        T(type=T.t_complex_line,
                          start=tokens[startline].start,
                          len=0,
                          children=sub,
                          lineprefix=getlineprefix()))
                    startline = None
                if lines:
                    self.analyze(lines)
                    self.tokens[firsttoken:i] = lines
                    i = firsttoken
                    firsttoken = None
                    lines = []
                    continue

                firsttoken = None

                lines = []
                i += 1
            else:
                if startline is None and lines:
                    self.analyze(lines)
                    self.tokens[firsttoken:i] = lines
                    i = firsttoken
                    lines = []
                    firsttoken = None
                else:
                    i += 1

        if startline is not None:
            sub = self.tokens[startline + 1:]
            lines.append(
                T(type=T.t_complex_line,
                  start=tokens[startline].start,
                  children=sub,
                  lineprefix=getlineprefix()))

        if lines:
            self.analyze(lines)
            self.tokens[firsttoken:] = lines
Example #27
0
    def run(self):
        tokens = self.tokens
        i = 0

        start = None
        remove_start = 1
        rowbegintoken = None

        def should_find_modifier():
            if rowbegintoken is None:
                return False
            if rowbegintoken.rawtagname:
                return False
            return True

        def args():
            if rowbegintoken is None:
                return {}
            return dict(vlist=rowbegintoken.vlist)

        while i < len(tokens):
            if start is None and self.is_table_cell_start(tokens[i]):
                rowbegintoken = None
                start = i
                remove_start = 0
                i += 1
            elif self.is_table_row_start(tokens[i]):
                if start is not None:
                    children = tokens[start + remove_start:i]
                    tokens[start:i] = [T(type=T.t_complex_table_row, tagname="tr",
                                         start=tokens[start].start, children=children, **args())]
                    if should_find_modifier():
                        self.find_modifier(tokens[start])
                    parse_table_cells(children, self.xopts)
                    start += 1  # we didn't remove the start symbol above
                    rowbegintoken = tokens[start]
                    remove_start = 1
                    i = start + 1

                else:
                    rowbegintoken = tokens[i]
                    remove_start = 1
                    start = i
                    i += 1
            elif self.is_table_row_end(tokens[i]):
                if start is not None:
                    sub = tokens[start + remove_start:i]
                    tokens[start:i + 1] = [T(type=T.t_complex_table_row, tagname="tr",
                                             start=tokens[start].start, children=sub, **args())]
                    if should_find_modifier():
                        self.find_modifier(tokens[start])
                    parse_table_cells(sub, self.xopts)
                    i = start + 1
                    start = None
                    rowbegintoken = None
                else:
                    i += 1
            else:
                i += 1

        if start is not None:
            sub = tokens[start + remove_start:]
            tokens[start:] = [T(type=T.t_complex_table_row, tagname="tr",
                                start=tokens[start].start, children=sub, **args())]
            if should_find_modifier():
                self.find_modifier(tokens[start])
            parse_table_cells(sub, self.xopts)
Example #28
0
 def create_nowiki(self, name, vlist, inner, xopts):
     txt = inner
     txt = util.replace_html_entities(txt)
     return T(type=T.t_text, text=txt)
Example #29
0
    def run(self):
        tokens = self.tokens
        i = 0
        marks = []

        stack = []

        while i < len(self.tokens):
            t = tokens[i]
            if t.type == T.t_2box_open:
                if len(marks) > 1:
                    stack.append(marks)
                marks = [i]
                i += 1
            elif t.type == T.t_newline and len(marks) < 2:
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i += 1
            elif t.type == T.t_special and t.text == "|":
                marks.append(i)
                i += 1
            elif t.type == T.t_2box_close and marks:
                marks.append(i)
                start = marks[0]

                target = T.join_as_text(tokens[start + 1:marks[1]]).strip()
                target = target.strip(u"\u200e\u200f")
                if target.startswith(":"):
                    target = target[1:]
                    colon = True
                else:
                    colon = False

                ilink = self.nshandler.resolve_interwiki(target)
                if ilink:
                    url = ilink.url
                    ns = None
                    partial = ilink.partial
                    langlink = ilink.language
                    interwiki = ilink.prefix
                    full = None
                else:
                    if target.startswith('/') and self.xopts.title:
                        ns, partial, full = self.nshandler.splitname(
                            self.xopts.title + target)
                        if full.endswith('/'):
                            full = full[:-1]
                            target = target[1:-1]
                    else:
                        ns, partial, full = self.nshandler.splitname(target)

                    if self.xopts.wikidb is not None:
                        url = self.xopts.wikidb.getURL(full)
                    else:
                        url = None
                    langlink = None
                    interwiki = None

                if not ilink and not partial:
                    i += 1
                    if stack:
                        marks = stack.pop()
                    else:
                        marks = []
                    continue

                node = T(type=T.t_complex_link,
                         children=[],
                         ns=ns,
                         colon=colon,
                         lang=self.lang,
                         nshandler=self.nshandler,
                         url=url)
                if langlink:
                    node.langlink = langlink
                if interwiki:
                    node.interwiki = interwiki

                sub = None
                if ns == nshandling.NS_IMAGE:
                    sub = self.extract_image_modifiers(marks, node)
                elif len(marks) > 2:
                    sub = tokens[marks[1] + 1:marks[-1]]

                if sub is None:
                    sub = []

                node.children = sub
                tokens[start:i + 1] = [node]
                node.target = target
                node.full_target = full
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i = start + 1
            else:
                i += 1
Example #30
0
 def create_math(self, name, vlist, inner, xopts):
     return T(type=T.t_complex_tag, tagname="math", vlist=vlist, math=inner)