Python join_as_text Exemples, mwlib.utoken.token.join_as_text Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : core.py Projet : pediapress/mwlib

def parse_inputbox(tokens, xopts):
    get_recursive_tag_parser("inputbox")(tokens, xopts)

    for t in tokens:
        if t.tagname == 'inputbox':
            t.inputbox = T.join_as_text(t.children)
            del t.children[:]

Exemple #2

0

Afficher le fichier

Fichier : core.py Projet : pediapress/mwlib

 def extract_image_modifiers(self, marks, node):
     cap = None
     for i in range(1, len(marks) - 1):
         tmp = self.tokens[marks[i] + 1:marks[i + 1]]
         if not self.handle_image_modifier(T.join_as_text(tmp), node):
             cap = tmp
     return cap

Exemple #3

0

Afficher le fichier

Fichier : core.py Projet : uncletall/mwlib

def parse_inputbox(tokens, xopts):
    get_recursive_tag_parser("inputbox")(tokens, xopts)

    for t in tokens:
        if t.tagname == 'inputbox':
            t.inputbox = T.join_as_text(t.children)
            del t.children[:]

Exemple #4

0

Afficher le fichier

Fichier : core.py Projet : uncletall/mwlib

 def extract_image_modifiers(self, marks, node):
     cap = None
     for i in range(1, len(marks) - 1):
         tmp = self.tokens[marks[i] + 1:marks[i + 1]]
         if not self.handle_image_modifier(T.join_as_text(tmp), node):
             cap = tmp
     return cap

Exemple #5

0

Afficher le fichier

 def find_modifier(self, row):
     children = row.children
     for i, x in enumerate(children):
         if x.type in (T.t_newline, T.t_break):
             mod = T.join_as_text(children[:i])
             #print "MODIFIER:", repr(mod)
             row.vlist = util.parseParams(mod)
             del children[:i]
             return

Exemple #6

0

Afficher le fichier

    def find_modifier(self, cell):
        children = cell.children
        if not children:
            return
        for i, x in enumerate(children):
            t = children[i]
            if t.type == T.t_2box_open:
                break
            if t.type == T.t_special and t.text == "|":
                mod = T.join_as_text(children[:i])
                cell.vlist = util.parseParams(mod)

                del children[:i + 1]
                return

Exemple #7

0

Afficher le fichier

    def find_caption(self, table):
        children = table.children
        start = None
        i = 0
        while i < len(children):
            t = children[i]
            if t.type == T.t_tablecaption:
                start = i
                i += 1
                break

            if t.text is None or t.text.strip():
                return
            i += 1

        modifier = None

        while i < len(children):
            t = children[i]
            if t.tagname not in ("ref", ) and (t.text is None
                                               or t.text.startswith("\n")):
                if modifier:
                    mod = T.join_as_text(children[start:modifier])
                    vlist = util.parseParams(mod)
                    sub = children[modifier + 1:i]
                else:
                    sub = children[start + 1:i]
                    vlist = {}

                caption = T(type=T.t_complex_caption,
                            children=sub,
                            vlist=vlist)
                children[start:i] = [caption]
                return
            elif t.text == "|" and modifier is None:
                modifier = i
            elif t.type == T.t_2box_open and modifier is None:
                modifier = 0

            i += 1

Exemple #8

0

Afficher le fichier

Fichier : parse_table.py Projet : pediapress/mwlib

    def find_caption(self, table):
        children = table.children
        start = None
        i = 0
        while i < len(children):
            t = children[i]
            if t.type == T.t_tablecaption:
                start = i
                i += 1
                break

            if t.text is None or t.text.strip():
                return
            i += 1

        modifier = None

        while i < len(children):
            t = children[i]
            if t.tagname not in ("ref",) and (t.text is None or t.text.startswith("\n")):
                if modifier:
                    mod = T.join_as_text(children[start:modifier])
                    vlist = util.parseParams(mod)
                    sub = children[modifier + 1:i]
                else:
                    sub = children[start + 1:i]
                    vlist = {}

                caption = T(type=T.t_complex_caption, children=sub, vlist=vlist)
                children[start:i] = [caption]
                return
            elif t.text == "|" and modifier is None:
                modifier = i
            elif t.type == T.t_2box_open and modifier is None:
                modifier = 0

            i += 1

Exemple #9

0

Afficher le fichier

 def compute_mod():
     mod = T.join_as_text(children[:i])
     #print "MODIFIER:", repr(mod)
     table.vlist = util.parseParams(mod)
     del children[:i]

Exemple #10

0

Afficher le fichier

def test_ebad_in_text():
    txt = T.join_as_text(core.parse_txt(u"foo\uebadbar"))
    assert txt == "foobar", "\uebad should be stripped"

Exemple #11

0

Afficher le fichier

Fichier : core.py Projet : pediapress/mwlib

    def run(self):
        tokens = self.tokens
        i = 0
        marks = []

        stack = []

        while i < len(self.tokens):
            t = tokens[i]
            if t.type == T.t_2box_open:
                if len(marks) > 1:
                    stack.append(marks)
                marks = [i]
                i += 1
            elif t.type == T.t_newline and len(marks) < 2:
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i += 1
            elif t.type == T.t_special and t.text == "|":
                marks.append(i)
                i += 1
            elif t.type == T.t_2box_close and marks:
                marks.append(i)
                start = marks[0]

                target = T.join_as_text(tokens[start + 1:marks[1]]).strip()
                target = target.strip(u"\u200e\u200f")
                if target.startswith(":"):
                    target = target[1:]
                    colon = True
                else:
                    colon = False

                ilink = self.nshandler.resolve_interwiki(target)
                if ilink:
                    url = ilink.url
                    ns = None
                    partial = ilink.partial
                    langlink = ilink.language
                    interwiki = ilink.prefix
                    full = None
                else:
                    if target.startswith('/') and self.xopts.title:
                        ns, partial, full = self.nshandler.splitname(self.xopts.title + target)
                        if full.endswith('/'):
                            full = full[:-1]
                            target = target[1:-1]
                    else:
                        ns, partial, full = self.nshandler.splitname(target)

                    if self.xopts.wikidb is not None:
                        url = self.xopts.wikidb.getURL(full)
                    else:
                        url = None
                    langlink = None
                    interwiki = None

                if not ilink and not partial:
                    i += 1
                    if stack:
                        marks = stack.pop()
                    else:
                        marks = []
                    continue

                node = T(type=T.t_complex_link, children=[], ns=ns, colon=colon,
                         lang=self.lang, nshandler=self.nshandler, url=url)
                if langlink:
                    node.langlink = langlink
                if interwiki:
                    node.interwiki = interwiki

                sub = None
                if ns == nshandling.NS_IMAGE:
                    sub = self.extract_image_modifiers(marks, node)
                elif len(marks) > 2:
                    sub = tokens[marks[1] + 1:marks[-1]]

                if sub is None:
                    sub = []

                node.children = sub
                tokens[start:i + 1] = [node]
                node.target = target
                node.full_target = full
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i = start + 1
            else:
                i += 1

Exemple #12

0

Afficher le fichier

Fichier : test_ebad.py Projet : aarddict/mwlib

def test_ebad_in_text():
    txt = T.join_as_text(core.parse_txt(u"foo\uebadbar"))
    assert txt=="foobar", "\uebad should be stripped"

Exemple #13

0

Afficher le fichier

Fichier : core.py Projet : uncletall/mwlib

    def run(self):
        tokens = self.tokens
        i = 0
        marks = []

        stack = []

        while i < len(self.tokens):
            t = tokens[i]
            if t.type == T.t_2box_open:
                if len(marks) > 1:
                    stack.append(marks)
                marks = [i]
                i += 1
            elif t.type == T.t_newline and len(marks) < 2:
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i += 1
            elif t.type == T.t_special and t.text == "|":
                marks.append(i)
                i += 1
            elif t.type == T.t_2box_close and marks:
                marks.append(i)
                start = marks[0]

                target = T.join_as_text(tokens[start + 1:marks[1]]).strip()
                target = target.strip(u"\u200e\u200f")
                if target.startswith(":"):
                    target = target[1:]
                    colon = True
                else:
                    colon = False

                ilink = self.nshandler.resolve_interwiki(target)
                if ilink:
                    url = ilink.url
                    ns = None
                    partial = ilink.partial
                    langlink = ilink.language
                    interwiki = ilink.prefix
                    full = None
                else:
                    if target.startswith('/') and self.xopts.title:
                        ns, partial, full = self.nshandler.splitname(
                            self.xopts.title + target)
                        if full.endswith('/'):
                            full = full[:-1]
                            target = target[1:-1]
                    else:
                        ns, partial, full = self.nshandler.splitname(target)

                    if self.xopts.wikidb is not None:
                        url = self.xopts.wikidb.getURL(full)
                    else:
                        url = None
                    langlink = None
                    interwiki = None

                if not ilink and not partial:
                    i += 1
                    if stack:
                        marks = stack.pop()
                    else:
                        marks = []
                    continue

                node = T(type=T.t_complex_link,
                         children=[],
                         ns=ns,
                         colon=colon,
                         lang=self.lang,
                         nshandler=self.nshandler,
                         url=url)
                if langlink:
                    node.langlink = langlink
                if interwiki:
                    node.interwiki = interwiki

                sub = None
                if ns == nshandling.NS_IMAGE:
                    sub = self.extract_image_modifiers(marks, node)
                elif len(marks) > 2:
                    sub = tokens[marks[1] + 1:marks[-1]]

                if sub is None:
                    sub = []

                node.children = sub
                tokens[start:i + 1] = [node]
                node.target = target
                node.full_target = full
                if stack:
                    marks = stack.pop()
                else:
                    marks = []
                i = start + 1
            else:
                i += 1