Esempio n. 1
0
def write_templates(res):
    import pywikibot
    from common.pywikibot_utils import safe_put

    for dom in [ 'fr','en', 'bn', 'pl' ]:
	if dom=='fr':
	    sep=' '
	elif dom == 'en':
	    sep=','
        else:
            sep = ''

        num, num_q0, num_q2, num_q3, num_q4, num_tr, num_texts, num_disambig = decode_res( res[dom] )
        percent = num_tr*100./(num_texts-num_disambig)
        num_q1 = num - (num_q0 + num_q2 + num_q3 + num_q4 ) 

	site = pywikibot.getSite(dom,fam='wikisource')
        page = pywikibot.Page(site,"Template:PAGES_NOT_PROOFREAD")
        safe_put(page, spaced_int(num_q1,sep), "")
        page = pywikibot.Page(site,"Template:ALL_PAGES")
        safe_put(page, spaced_int(num,sep), "")
        page = pywikibot.Page(site,"Template:PR_TEXTS")
        safe_put(page, spaced_int(num_tr,sep), "")
        page = pywikibot.Page(site,"Template:ALL_TEXTS")
        safe_put(page, spaced_int(num_texts - num_disambig,sep), "")
        page = pywikibot.Page(site,"Template:PR_PERCENT")
        safe_put(page, "%.2f"%percent, "")
def do_extract(mysite, maintitle, user, codelang, cache):
    prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8')
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    djvuname = maintitle.replace(u' ', u'_')
    print djvuname.encode('utf-8')

    text_layer = align.get_djvu(cache, mysite, djvuname, True)
    if not text_layer:
        return ret_val(E_ERROR, "unable to retrieve text layer")

    text = u''
    for pos, page_text in enumerate(text_layer):
        text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode(pos+1) + u']]==\n'
        text += page_text + u'\n'

    page = pywikibot.Page(mysite, u'User:'******'/Text')
    safe_put(page, text, comment = u'extract text')

    return ret_val(E_OK, "")
Esempio n. 3
0
def do_extract(mysite, maintitle, user, codelang, cache):
    prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8')
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    djvuname = maintitle.replace(u' ', u'_')
    print djvuname.encode('utf-8')

    text_layer = align.get_djvu(cache, mysite, djvuname, True)
    if not text_layer:
        return ret_val(E_ERROR, "unable to retrieve text layer")

    text = u''
    for pos, page_text in enumerate(text_layer):
        text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode(
            pos + 1) + u']]==\n'
        text += page_text + u'\n'

    page = pywikibot.Page(mysite, u'User:'******'/Text')
    safe_put(page, text, comment=u'extract text')

    return ret_val(E_OK, "")
Esempio n. 4
0
def do_match(mysite, maintitle, user, codelang):
    prefix = page_prefixes['wikisource'].get(codelang)
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    page = pywikibot.Page(mysite, maintitle)
    try:
        text = page.get()
    except:
        utils.print_traceback("failed to get page")
        return ret_val(E_ERROR, "failed to get page")

    if text.find("{{R2Mondes") != -1:
        global pl_dict
        pl_dict = {}
        p0 = re.compile("\{\{R2Mondes\|(\d+)\|(\d+)\|(\d+)\}\}\s*\n")
        try:
            new_text = p0.sub(repl, text)
        except pywikibot.NoPage:
            return ret_val(E_ERROR, "Erreur : impossible de trouver l'index")
        p = re.compile('==\[\[Page:([^=]+)\]\]==\n')

        cache = lifo_cache.LifoCache('match_and_split_text_layer')
        bl = p.split(new_text)
        for i in range(len(bl) / 2):
            title = bl[i * 2 + 1]
            content = bl[i * 2 + 2]
            filename, pagenum = title.split('/')
            if i == 0:
                cached_text = align.get_djvu(cache, mysite, filename, True)
            else:
                cached_text = align.get_djvu(cache, mysite, filename, False)
            if not cached_text:
                return ret_val(E_ERROR, "Erreur : fichier absent")
            if content.find("R2Mondes") != -1:
                p0 = re.compile("\{\{R2Mondes\|\d+\|\d+\|(\d+)\}\}\s*\n")
                bl0 = p0.split(text)
                title0 = bl0[i * 2 + 1].encode("utf8")
                return ret_val(
                    E_ERROR,
                    "Erreur : Syntaxe 'R2Mondes' incorrecte, dans la page " +
                    title0)
            r = align.match_page(content, cached_text[int(pagenum) - 1])
            print "%s %s  : %f" % (filename, pagenum, r)
            if r < 0.1:
                return ret_val(
                    E_ERROR,
                    "Erreur : Le texte ne correspond pas, page %s" % pagenum)
        #the page is ok
        new_text = re.sub(u'<references[ ]*/>', u'', new_text)
        new_text = re.sub(u'[ ]([,])', u'\\1', new_text)
        new_text = re.sub(u'([^.])[ ]([,.])', u'\\1\\2', new_text)
        new_text = re.sub(u'\.\.\.', u'…', new_text)

        new_text = re.sub(u'([^ \s])([;:!?])', u'\\1 \\2', new_text)
        new_text = re.sub(u'([«;:!?])([^ \s…])', u'\\1 \\2', new_text)
        # separated from the previous regexp else "word!»" overlap
        new_text = re.sub(u'([^ \s])([»])', u'\\1 \\2', new_text)

        # workaround some buggy text
        new_text = re.sub(u'([;:!?»]) \n', u'\\1\n', new_text)
        new_text = re.sub(u'([;:!?»])\'\'([ \n])', u'\\1\'\'\\2', new_text)
        # <&nbsp;><space>
        #new_text = re.sub(u'  ([;:!?»])', u' \\1', new_text)
        #new_text = re.sub(u' ([;:!?»])', u' \\1', new_text)
        new_text = re.sub(u'([;:!?»]) <br />', u'\\1<br />', new_text)
        new_text = new_text.replace(u'Page : ', u'Page:')
        new_text = new_text.replace(u'\n: ', u'\n:')
        new_text = new_text.replace(u'\n:: ', u'\n::')
        new_text = new_text.replace(u'\n::: ', u'\n:::')
        new_text = new_text.replace(u'\n:::: ', u'\n::::')
        new_text = new_text.replace(u'\n::::: ', u'\n:::::')
        new_text = re.sub(
            u'1er (janvier|février|avril|mars|mai|juin|juillet|août|septembre|octobre|novembre|décembre)',
            u'1{{er}} \\1', new_text)
        new_text = re.sub(u'([0-9])e ', u'\\1{{e}} ', new_text)
        #text = re.sub(u'([;:!?»]) <div>\n', u'\\1\n', new_text)

        # try to move the title inside the M&S
        match_title = re.search(u"{{[Jj]ournal[ ]*\|*(.*?)\|", new_text)
        if match_title:
            pos = re.search(u'==(.*?)==', new_text)
            if pos:
                new_text = new_text[
                    0:pos.end(0)] + u'\n{{c|' + match_title.group(
                        1) + u'|fs=140%}}\n\n\n' + new_text[pos.end(0):]

        safe_put(page, new_text, user + ": match")
        jobs['number_of_split_job'] += 1
        # FIXME: can we pass the request here and use a callback in the js?
        # FIXME: server is None?
        jobs['split_queue'].put(maintitle, codelang, user, time.time(), None,
                                None, None)
        # FIXME: that's an abuse of E_ERROR
        return ret_val(E_ERROR, "ok : transfert en cours.")

    prefix = prefix.decode('utf-8')
    p = re.compile("==__MATCH__:\[\[" + prefix +
                   ":(.*?)/(\d+)(\|step=(\d+))?\]\]==")
    m = re.search(p, text)
    if m:
        djvuname = m.group(1)
        number = m.group(2)
        pos = text.find(m.group(0))
        head = text[:pos]
        text = text[pos + len(m.group(0)):]
        if m.group(4):
            try:
                step = int(m.group(4))
            except:
                return ret_val(E_ERROR, "match tag invalid")
        else:
            step = 1
    else:
        return ret_val(E_ERROR, "match tag not found")

    pywikibot.output(djvuname + " " + number + " " + str(step))
    try:
        number = int(number)
    except:
        return ret_val(E_ERROR, "illformed __MATCH__: no page number ?")

    cache = lifo_cache.LifoCache('match_and_split_text_layer')
    cached_text = align.get_djvu(cache, mysite, djvuname, True)
    if not cached_text:
        return ret_val(
            E_ERROR, "unable to read djvu, if the File: exists, please retry")

    data = align.do_match(text,
                          cached_text,
                          djvuname,
                          number,
                          verbose=False,
                          prefix=prefix,
                          step=step)
    if not data['error']:
        safe_put(page, head + data['text'], user + ": match")
        data['text'] = ""

    return data
Esempio n. 5
0
def do_split(mysite, rootname, user, codelang):
    prefix = page_prefixes['wikisource'].get(codelang)
    if not prefix:
        return ret_val(E_ERROR, "no Page: prefix")
    prefix = prefix.decode('utf-8')

    try:
        page = pywikibot.Page(mysite, rootname)
        text = page.get()
    except:
        return ret_val(E_ERROR, "unable to read page")

    p = re.compile('==\[\[(' + prefix + ':[^=]+)\]\]==\n')
    bl = p.split(text)
    titles = '\n'

    group = ""

    fromsection = ""
    tosection = ""
    fromsection_page = tosection_page = None

    for i in range(len(bl) / 2):

        title = bl[i * 2 + 1]
        content = bl[i * 2 + 2]

        #for illegalChar in ['#', '<', '>', '[', ']', '|', '{', '}', '\n', u'\ufffd']:
        #    if illegalChar in title:
        #        title = title.replace(illegalChar,'_')

        #always NOPREFIX
        pagetitle = title

        content = content.rstrip("\n ")

        pl = pywikibot.Page(mysite, pagetitle)

        m = re.match(prefix + ':(.*?)/(\d+)', pagetitle)
        if m:
            filename = m.group(1)
            pagenum = int(m.group(2))
            if not group:
                group = filename
                pfrom = pagenum
                pto = pfrom
            else:
                if filename != group:
                    titles = titles + "<pages index=\"%s\" from=%d to=%d />\n" % (
                        group, pfrom, pto)
                    group = filename
                    pfrom = pagenum
                    pto = pfrom
                elif pagenum != pto + 1:
                    titles = titles + "<pages index=\"%s\" from=%d to=%d />\n" % (
                        group, pfrom, pto)
                    group = filename
                    pfrom = pagenum
                    pto = pfrom
                else:
                    pto = pagenum
        else:
            if group:
                titles = titles + "<pages index=\"%s\" from=%d to=%d />\n" % (
                    group, pfrom, pto)
                group = False

            titles = titles + "{{" + pagetitle + "}}\n"

        #prepend br
        if content and content[0] == '\n':
            content = '<nowiki />\n' + content

        if pl.exists():
            old_text = pl.get()
            refs = pl.getReferences(onlyTemplateInclusion=True)
            numrefs = 0
            for ref in refs:
                numrefs += 1

            #first and last pages : check if they are transcluded
            if numrefs > 0:
                m = re.match(
                    "<noinclude>(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",
                    old_text, re.MULTILINE | re.DOTALL)
                if m and (i == 0 or i == (len(bl) / 2 - 1)):
                    print "creating sections"
                    old_text = m.group(2)
                    if i == 0:
                        first_part = old_text
                        second_part = content
                        fromsection = "fromsection=s2 "
                        fromsection_page = ref
                    else:
                        first_part = content
                        second_part = old_text
                        tosection = "tosection=s1 "
                        tosection_page = ref

                    content = "<noinclude>"+m.group(1)+"</noinclude><section begin=s1/>"+first_part+"<section end=s1/>\n----\n" \
                        + "<section begin=s2/>"+second_part+"<section end=s2/><noinclude>"+m.group(3)+"</noinclude>"
            else:
                m = re.match(
                    "<noinclude><pagequality level=\"1\" user=\"(.*?)\" />(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",
                    old_text, re.MULTILINE | re.DOTALL)
                if m:
                    print "ok, quality 1, first try"
                    content = "<noinclude><pagequality level=\"1\" user=\"" + m.group(
                        1) + "\" />" + m.group(
                            2
                        ) + "</noinclude>" + content + "<noinclude>" + m.group(
                            4) + "</noinclude>"
                    m2 = re.match(
                        "<noinclude>\{\{PageQuality\|1\|(.*?)\}\}(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",
                        old_text, re.MULTILINE | re.DOTALL)
                    if m2:
                        # FIXME: shouldn't use an hardcoded name here
                        print "ok, quality 1, second try"
                        content = "<noinclude><pagequality level=\"1\" user=\"Phe-bot\" />" + m2.group(
                            2
                        ) + "</noinclude>" + content + "<noinclude>" + m2.group(
                            4) + "</noinclude>"

        else:
            header = u'<noinclude><pagequality level="1" user="******" /><div class="pagetext">\n\n\n</noinclude>'
            footer = u'<noinclude>\n<references/></div></noinclude>'
            content = header + content + footer

        safe_put(pl, content, user + ": split")

    if group:
        titles = titles + "<pages index=\"%s\" from=%d to=%d %s%s/>\n" % (
            group, pfrom, pto, fromsection, tosection)

    if fromsection and fromsection_page:
        rtext = fromsection_page.get()
        m = re.search(
            "<pages index=\"(.*?)\" from=(.*?) to=(.*?) (fromsection=s2 |)/>",
            rtext)
        if m and m.group(1) == group:
            rtext = rtext.replace(m.group(0),
                                  m.group(0)[:-2] + "tosection=s1 />")
            print "new rtext"
            safe_put(fromsection_page, rtext, user + ": split")

    if tosection and tosection_page:
        rtext = tosection_page.get()
        m = re.search(
            "<pages index=\"(.*?)\" from=(.*?) to=(.*?) (tosection=s1 |)/>",
            rtext)
        if m and m.group(1) == group:
            rtext = rtext.replace(m.group(0),
                                  m.group(0)[:-2] + "fromsection=s2 />")
            print "new rtext"
            safe_put(tosection_page, rtext, user + ": split")

    header = bl[0]
    safe_put(page, header + titles, user + ": split")

    return ret_val(E_OK, "")
Esempio n. 6
0
def do_match(mysite, maintitle, user, codelang):
    prefix = page_prefixes['wikisource'].get(codelang)
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    page = pywikibot.Page(mysite, maintitle)
    try:
        text = page.get()
    except:
        utils.print_traceback("failed to get page")
        return ret_val(E_ERROR, "failed to get page")

    if text.find("{{R2Mondes")!=-1:
        global pl_dict
        pl_dict = {}
        p0 = re.compile("\{\{R2Mondes\|(\d+)\|(\d+)\|(\d+)\}\}\s*\n")
        try:
            new_text = p0.sub(repl, text)
        except pywikibot.NoPage:
            return ret_val(E_ERROR, "Erreur : impossible de trouver l'index")
        p = re.compile('==\[\[Page:([^=]+)\]\]==\n')

        cache = lifo_cache.LifoCache('match_and_split_text_layer')
        bl= p.split(new_text)
        for i in range(len(bl)/2):
            title  = bl[i*2+1]
            content = bl[i*2+2]
            filename, pagenum = title.split('/')
            if i == 0:
                cached_text = align.get_djvu(cache, mysite, filename, True)
            else:
                cached_text = align.get_djvu(cache, mysite, filename, False)
            if not cached_text:
                return ret_val(E_ERROR, "Erreur : fichier absent")
            if content.find("R2Mondes") != -1:
                p0 = re.compile("\{\{R2Mondes\|\d+\|\d+\|(\d+)\}\}\s*\n")
                bl0 = p0.split(text)
                title0 = bl0[i*2+1].encode("utf8")
                return ret_val(E_ERROR, "Erreur : Syntaxe 'R2Mondes' incorrecte, dans la page "+title0)
            r = align.match_page(content, cached_text[int(pagenum)-1])
            print "%s %s  : %f"%(filename, pagenum, r)
            if r < 0.1:
                return ret_val(E_ERROR, "Erreur : Le texte ne correspond pas, page %s" % pagenum)
        #the page is ok
        new_text = re.sub(u'<references[ ]*/>', u'', new_text)
        new_text = re.sub(u'[ ]([,])', u'\\1', new_text)
        new_text = re.sub(u'([^.])[ ]([,.])', u'\\1\\2', new_text)
        new_text = re.sub(u'\.\.\.', u'…', new_text)

        new_text = re.sub(u'([^ \s])([;:!?])', u'\\1 \\2', new_text)
        new_text = re.sub(u'([«;:!?])([^ \s…])', u'\\1 \\2', new_text)
        # separated from the previous regexp else "word!»" overlap
        new_text = re.sub(u'([^ \s])([»])', u'\\1 \\2', new_text)

        # workaround some buggy text
        new_text = re.sub(u'([;:!?»]) \n', u'\\1\n', new_text)
        new_text = re.sub(u'([;:!?»])\'\'([ \n])', u'\\1\'\'\\2', new_text)
        # <&nbsp;><space>
        #new_text = re.sub(u'  ([;:!?»])', u' \\1', new_text)
        #new_text = re.sub(u' ([;:!?»])', u' \\1', new_text)
        new_text = re.sub(u'([;:!?»]) <br />', u'\\1<br />', new_text)
        new_text = new_text.replace(u'Page : ', u'Page:')
        new_text = new_text.replace(u'\n: ', u'\n:')
        new_text = new_text.replace(u'\n:: ', u'\n::')
        new_text = new_text.replace(u'\n::: ', u'\n:::')
        new_text = new_text.replace(u'\n:::: ', u'\n::::')
        new_text = new_text.replace(u'\n::::: ', u'\n:::::')
        new_text = re.sub(u'1er (janvier|février|avril|mars|mai|juin|juillet|août|septembre|octobre|novembre|décembre)', u'1{{er}} \\1', new_text)
        new_text = re.sub(u'([0-9])e ', u'\\1{{e}} ', new_text)
        #text = re.sub(u'([;:!?»]) <div>\n', u'\\1\n', new_text)

        # try to move the title inside the M&S
        match_title = re.search(u"{{[Jj]ournal[ ]*\|*(.*?)\|", new_text)
        if match_title:
            pos = re.search(u'==(.*?)==', new_text)
            if pos:
                new_text = new_text[0:pos.end(0)] + u'\n{{c|' + match_title.group(1) + u'|fs=140%}}\n\n\n' + new_text[pos.end(0):]

        safe_put(page,new_text,user+": match")
        jobs['number_of_split_job'] += 1
        # FIXME: can we pass the request here and use a callback in the js?
        # FIXME: server is None?
        jobs['split_queue'].put(maintitle, codelang, user, time.time(), None, None, None)
        # FIXME: that's an abuse of E_ERROR
        return ret_val(E_ERROR, "ok : transfert en cours.")

    prefix = prefix.decode('utf-8')
    p = re.compile("==__MATCH__:\[\[" + prefix + ":(.*?)/(\d+)(\|step=(\d+))?\]\]==")
    m = re.search(p,text)
    if m:
        djvuname = m.group(1)
        number = m.group(2)
        pos = text.find(m.group(0))
        head = text[:pos]
        text = text[pos+len(m.group(0)):]
        if m.group(4):
            try:
                step = int(m.group(4))
            except:
                return ret_val(E_ERROR, "match tag invalid")
        else:
            step = 1
    else:
        return ret_val(E_ERROR, "match tag not found")

    pywikibot.output(djvuname + " " + number + " " + str(step))
    try:
        number = int(number)
    except:
        return ret_val(E_ERROR, "illformed __MATCH__: no page number ?")

    cache = lifo_cache.LifoCache('match_and_split_text_layer')
    cached_text = align.get_djvu(cache, mysite, djvuname, True)
    if not cached_text:
        return ret_val(E_ERROR, "unable to read djvu, if the File: exists, please retry")

    data = align.do_match(text, cached_text, djvuname, number, verbose = False, prefix = prefix, step = step)
    if not data['error']:
        safe_put(page, head + data['text'], user + ": match")
        data['text'] = ""

    return data
Esempio n. 7
0
def do_split(mysite, rootname, user, codelang):
    prefix = page_prefixes['wikisource'].get(codelang)
    if not prefix:
        return ret_val(E_ERROR, "no Page: prefix")
    prefix = prefix.decode('utf-8')

    try:
        page = pywikibot.Page(mysite, rootname)
        text = page.get()
    except:
        return ret_val(E_ERROR, "unable to read page")

    p = re.compile('==\[\[(' + prefix + ':[^=]+)\]\]==\n')
    bl = p.split(text)
    titles = '\n'

    group = ""

    fromsection = ""
    tosection = ""
    fromsection_page = tosection_page = None

    for i in range(len(bl)/2):

        title  = bl[i*2+1]
        content = bl[i*2+2]

        #for illegalChar in ['#', '<', '>', '[', ']', '|', '{', '}', '\n', u'\ufffd']:
        #    if illegalChar in title:
        #        title = title.replace(illegalChar,'_')

        #always NOPREFIX
        pagetitle = title

        content = content.rstrip("\n ")

        pl = pywikibot.Page(mysite, pagetitle)

        m =  re.match(prefix + ':(.*?)/(\d+)', pagetitle)
        if m:
            filename = m.group(1)
            pagenum = int(m.group(2))
            if not group:
                group = filename
                pfrom = pagenum
                pto = pfrom
            else:
                if filename != group:
                    titles = titles + "<pages index=\"%s\" from=%d to=%d />\n"%(group,pfrom,pto)
                    group = filename
                    pfrom = pagenum
                    pto = pfrom
                elif pagenum != pto + 1:
                    titles = titles + "<pages index=\"%s\" from=%d to=%d />\n"%(group,pfrom,pto)
                    group = filename
                    pfrom = pagenum
                    pto = pfrom
                else:
                    pto = pagenum
        else:
            if group:
                titles = titles + "<pages index=\"%s\" from=%d to=%d />\n"%(group,pfrom,pto)
                group = False

            titles = titles + "{{"+pagetitle+"}}\n"

        #prepend br
        if content and content[0]=='\n':
            content = '<nowiki />\n'+content

        if pl.exists():
            old_text = pl.get()
            refs = pl.getReferences(onlyTemplateInclusion = True)
            numrefs = 0
            for ref in refs:
                numrefs += 1

            #first and last pages : check if they are transcluded
            if numrefs > 0 :
                m = re.match("<noinclude>(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",old_text,re.MULTILINE|re.DOTALL)
                if m and (i == 0 or i == (len(bl)/2 -1)):
                    print "creating sections"
                    old_text = m.group(2)
                    if i == 0:
                        first_part = old_text
                        second_part = content
                        fromsection="fromsection=s2 "
                        fromsection_page = ref
                    else:
                        first_part = content
                        second_part = old_text
                        tosection="tosection=s1 "
                        tosection_page = ref

                    content = "<noinclude>"+m.group(1)+"</noinclude><section begin=s1/>"+first_part+"<section end=s1/>\n----\n" \
                        + "<section begin=s2/>"+second_part+"<section end=s2/><noinclude>"+m.group(3)+"</noinclude>"
            else:
                m = re.match("<noinclude><pagequality level=\"1\" user=\"(.*?)\" />(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",
                             old_text,re.MULTILINE|re.DOTALL)
                if m :
                    print "ok, quality 1, first try"
                    content = "<noinclude><pagequality level=\"1\" user=\"" + m.group(1) + "\" />"+m.group(2)+"</noinclude>"+content+"<noinclude>"+m.group(4)+"</noinclude>"
                    m2 = re.match("<noinclude>\{\{PageQuality\|1\|(.*?)\}\}(.*?)</noinclude>(.*)<noinclude>(.*?)</noinclude>",
                                  old_text,re.MULTILINE|re.DOTALL)
                    if m2 :
                        # FIXME: shouldn't use an hardcoded name here
                        print "ok, quality 1, second try"
                        content = "<noinclude><pagequality level=\"1\" user=\"Phe-bot\" />"+m2.group(2)+"</noinclude>"+content+"<noinclude>"+m2.group(4)+"</noinclude>"

        else:
            header = u'<noinclude><pagequality level="1" user="******" />\n\n\n</noinclude>'
            footer = u'<noinclude>\n<references/></div></noinclude>'
            content = header + content + footer
            

        do_put = True
        if pl.exists():
            if hasattr(pl, '_quality') and pl._quality != 1:
                print "quality != 1, not saved"
                do_put = False
            else:
                print "can't get quality level"
        if do_put:
            safe_put(pl,content,user+": split")

    if group:
        titles = titles + "<pages index=\"%s\" from=%d to=%d %s%s/>\n"%(group,pfrom,pto,fromsection,tosection)

    if fromsection and fromsection_page:
        rtext = fromsection_page.get()
        m = re.search("<pages index=\"(.*?)\" from=(.*?) to=(.*?) (fromsection=s2 |)/>",rtext)
        if m and m.group(1)==group:
            rtext = rtext.replace(m.group(0), m.group(0)[:-2]+"tosection=s1 />" )
            print "new rtext"
            safe_put(fromsection_page,rtext,user+": split")

    if tosection and tosection_page:
        rtext = tosection_page.get()
        m = re.search("<pages index=\"(.*?)\" from=(.*?) to=(.*?) (tosection=s1 |)/>",rtext)
        if m and m.group(1)==group:
            rtext = rtext.replace(m.group(0), m.group(0)[:-2]+"fromsection=s2 />" )
            print "new rtext"
            safe_put(tosection_page,rtext,user+": split")

    header = bl[0]
    safe_put(page,header+titles,user+": split")

    return ret_val(E_OK, "")