Exemple #1
0
def parse_inputbox(tokens, xopts):
    get_recursive_tag_parser("inputbox")(tokens, xopts)

    for t in tokens:
        if t.tagname == 'inputbox':
            t.inputbox = T.join_as_text(t.children)
            del t.children[:]
Exemple #2
0
 def extract_image_modifiers(self, marks, node):
     cap = None
     for i in range(1, len(marks) - 1):
         tmp = self.tokens[marks[i] + 1:marks[i + 1]]
         if not self.handle_image_modifier(T.join_as_text(tmp), node):
             cap = tmp
     return cap
Exemple #3
0
def parse_inputbox(tokens, xopts):
    get_recursive_tag_parser("inputbox")(tokens, xopts)

    for t in tokens:
        if t.tagname == 'inputbox':
            t.inputbox = T.join_as_text(t.children)
            del t.children[:]
Exemple #4
0
 def extract_image_modifiers(self, marks, node):
     cap = None
     for i in range(1, len(marks) - 1):
         tmp = self.tokens[marks[i] + 1:marks[i + 1]]
         if not self.handle_image_modifier(T.join_as_text(tmp), node):
             cap = tmp
     return cap
Exemple #5
0
 def find_modifier(self, row):
     children = row.children
     for i, x in enumerate(children):
         if x.type in (T.t_newline, T.t_break):
             mod = T.join_as_text(children[:i])
             #print "MODIFIER:", repr(mod)
             row.vlist = util.parseParams(mod)
             del children[:i]
             return
Exemple #6
0
    def find_modifier(self, cell):
        children = cell.children
        if not children:
            return
        for i, x in enumerate(children):
            t = children[i]
            if t.type == T.t_2box_open:
                break
            if t.type == T.t_special and t.text == "|":
                mod = T.join_as_text(children[:i])
                cell.vlist = util.parseParams(mod)

                del children[:i + 1]
                return
Exemple #7
0
    def find_caption(self, table):
        children = table.children
        start = None
        i = 0
        while i < len(children):
            t = children[i]
            if t.type == T.t_tablecaption:
                start = i
                i += 1
                break

            if t.text is None or t.text.strip():
                return
            i += 1

        modifier = None

        while i < len(children):
            t = children[i]
            if t.tagname not in ("ref", ) and (t.text is None
                                               or t.text.startswith("\n")):
                if modifier:
                    mod = T.join_as_text(children[start:modifier])
                    vlist = util.parseParams(mod)
                    sub = children[modifier + 1:i]
                else:
                    sub = children[start + 1:i]
                    vlist = {}

                caption = T(type=T.t_complex_caption,
                            children=sub,
                            vlist=vlist)
                children[start:i] = [caption]
                return
            elif t.text == "|" and modifier is None:
                modifier = i
            elif t.type == T.t_2box_open and modifier is None:
                modifier = 0

            i += 1
Exemple #8
0
    def find_caption(self, table):
        children = table.children
        start = None
        i = 0
        while i < len(children):
            t = children[i]
            if t.type == T.t_tablecaption:
                start = i
                i += 1
                break

            if t.text is None or t.text.strip():
                return
            i += 1

        modifier = None

        while i < len(children):
            t = children[i]
            if t.tagname not in ("ref",) and (t.text is None or t.text.startswith("\n")):
                if modifier:
                    mod = T.join_as_text(children[start:modifier])
                    vlist = util.parseParams(mod)
                    sub = children[modifier + 1:i]
                else:
                    sub = children[start + 1:i]
                    vlist = {}

                caption = T(type=T.t_complex_caption, children=sub, vlist=vlist)
                children[start:i] = [caption]
                return
            elif t.text == "|" and modifier is None:
                modifier = i
            elif t.type == T.t_2box_open and modifier is None:
                modifier = 0

            i += 1
Exemple #9
0
 def compute_mod():
     mod = T.join_as_text(children[:i])
     #print "MODIFIER:", repr(mod)
     table.vlist = util.parseParams(mod)
     del children[:i]
Exemple #10
0
def test_ebad_in_text():
    txt = T.join_as_text(core.parse_txt(u"foo\uebadbar"))
    assert txt == "foobar", "\uebad should be stripped"
Exemple #11
0
    def run(self):
        tokens = self.tokens
        i = 0
        marks = []

        stack = []

        while i < len(self.tokens):
            t = tokens[i]
            if t.type == T.t_2box_open:
                if len(marks) > 1:
                    stack.append(marks)
                marks = [i]
                i += 1
            elif t.type == T.t_newline and len(marks) < 2:
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i += 1
            elif t.type == T.t_special and t.text == "|":
                marks.append(i)
                i += 1
            elif t.type == T.t_2box_close and marks:
                marks.append(i)
                start = marks[0]

                target = T.join_as_text(tokens[start + 1:marks[1]]).strip()
                target = target.strip(u"\u200e\u200f")
                if target.startswith(":"):
                    target = target[1:]
                    colon = True
                else:
                    colon = False

                ilink = self.nshandler.resolve_interwiki(target)
                if ilink:
                    url = ilink.url
                    ns = None
                    partial = ilink.partial
                    langlink = ilink.language
                    interwiki = ilink.prefix
                    full = None
                else:
                    if target.startswith('/') and self.xopts.title:
                        ns, partial, full = self.nshandler.splitname(self.xopts.title + target)
                        if full.endswith('/'):
                            full = full[:-1]
                            target = target[1:-1]
                    else:
                        ns, partial, full = self.nshandler.splitname(target)

                    if self.xopts.wikidb is not None:
                        url = self.xopts.wikidb.getURL(full)
                    else:
                        url = None
                    langlink = None
                    interwiki = None

                if not ilink and not partial:
                    i += 1
                    if stack:
                        marks = stack.pop()
                    else:
                        marks = []
                    continue

                node = T(type=T.t_complex_link, children=[], ns=ns, colon=colon,
                         lang=self.lang, nshandler=self.nshandler, url=url)
                if langlink:
                    node.langlink = langlink
                if interwiki:
                    node.interwiki = interwiki

                sub = None
                if ns == nshandling.NS_IMAGE:
                    sub = self.extract_image_modifiers(marks, node)
                elif len(marks) > 2:
                    sub = tokens[marks[1] + 1:marks[-1]]

                if sub is None:
                    sub = []

                node.children = sub
                tokens[start:i + 1] = [node]
                node.target = target
                node.full_target = full
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i = start + 1
            else:
                i += 1
Exemple #12
0
def test_ebad_in_text():
    txt = T.join_as_text(core.parse_txt(u"foo\uebadbar"))
    assert txt=="foobar", "\uebad should be stripped"
Exemple #13
0
    def run(self):
        tokens = self.tokens
        i = 0
        marks = []

        stack = []

        while i < len(self.tokens):
            t = tokens[i]
            if t.type == T.t_2box_open:
                if len(marks) > 1:
                    stack.append(marks)
                marks = [i]
                i += 1
            elif t.type == T.t_newline and len(marks) < 2:
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i += 1
            elif t.type == T.t_special and t.text == "|":
                marks.append(i)
                i += 1
            elif t.type == T.t_2box_close and marks:
                marks.append(i)
                start = marks[0]

                target = T.join_as_text(tokens[start + 1:marks[1]]).strip()
                target = target.strip(u"\u200e\u200f")
                if target.startswith(":"):
                    target = target[1:]
                    colon = True
                else:
                    colon = False

                ilink = self.nshandler.resolve_interwiki(target)
                if ilink:
                    url = ilink.url
                    ns = None
                    partial = ilink.partial
                    langlink = ilink.language
                    interwiki = ilink.prefix
                    full = None
                else:
                    if target.startswith('/') and self.xopts.title:
                        ns, partial, full = self.nshandler.splitname(
                            self.xopts.title + target)
                        if full.endswith('/'):
                            full = full[:-1]
                            target = target[1:-1]
                    else:
                        ns, partial, full = self.nshandler.splitname(target)

                    if self.xopts.wikidb is not None:
                        url = self.xopts.wikidb.getURL(full)
                    else:
                        url = None
                    langlink = None
                    interwiki = None

                if not ilink and not partial:
                    i += 1
                    if stack:
                        marks = stack.pop()
                    else:
                        marks = []
                    continue

                node = T(type=T.t_complex_link,
                         children=[],
                         ns=ns,
                         colon=colon,
                         lang=self.lang,
                         nshandler=self.nshandler,
                         url=url)
                if langlink:
                    node.langlink = langlink
                if interwiki:
                    node.interwiki = interwiki

                sub = None
                if ns == nshandling.NS_IMAGE:
                    sub = self.extract_image_modifiers(marks, node)
                elif len(marks) > 2:
                    sub = tokens[marks[1] + 1:marks[-1]]

                if sub is None:
                    sub = []

                node.children = sub
                tokens[start:i + 1] = [node]
                node.target = target
                node.full_target = full
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i = start + 1
            else:
                i += 1