コード例 #1
0
def read_djvu(book_name, cached_text, datas, opt):
    from match_and_split import align
    data = align.get_djvu(cached_text, opt.site, book_name, True)
    for pos, text in enumerate(data):
        text = re.sub(u'(?ms)<noinclude>(.*?)</noinclude>', u'', text)
        datas.setdefault(pos + 1, [])
        datas[pos + 1].append(text)
コード例 #2
0
ファイル: verify_match.py プロジェクト: Aubreymcfato/phetools
def read_djvu(book_name, cached_text, datas, opt):
    from match_and_split import align
    data = align.get_djvu(cached_text, opt.site, book_name, True)
    for pos, text in enumerate(data):
        text = re.sub(u'(?ms)<noinclude>(.*?)</noinclude>', u'', text)
        datas.setdefault(pos + 1, [])
        datas[pos + 1].append(text)
コード例 #3
0
def do_extract(mysite, maintitle, user, codelang, cache):
    prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8')
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    djvuname = maintitle.replace(u' ', u'_')
    print djvuname.encode('utf-8')

    text_layer = align.get_djvu(cache, mysite, djvuname, True)
    if not text_layer:
        return ret_val(E_ERROR, "unable to retrieve text layer")

    text = u''
    for pos, page_text in enumerate(text_layer):
        text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode(pos+1) + u']]==\n'
        text += page_text + u'\n'

    page = pywikibot.Page(mysite, u'User:'******'/Text')
    safe_put(page, text, comment = u'extract text')

    return ret_val(E_OK, "")
コード例 #4
0
def do_extract(mysite, maintitle, user, codelang, cache):
    prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8')
    if not prefix:
        return ret_val(E_ERROR, "no prefix")

    djvuname = maintitle.replace(u' ', u'_')
    print djvuname.encode('utf-8')

    text_layer = align.get_djvu(cache, mysite, djvuname, True)
    if not text_layer:
        return ret_val(E_ERROR, "unable to retrieve text layer")

    text = u''
    for pos, page_text in enumerate(text_layer):
        text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode(
            pos + 1) + u']]==\n'
        text += page_text + u'\n'

    page = pywikibot.Page(mysite, u'User:'******'/Text')
    safe_put(page, text, comment=u'extract text')

    return ret_val(E_OK, "")