def read_djvu(book_name, cached_text, datas, opt): from match_and_split import align data = align.get_djvu(cached_text, opt.site, book_name, True) for pos, text in enumerate(data): text = re.sub(u'(?ms)<noinclude>(.*?)</noinclude>', u'', text) datas.setdefault(pos + 1, []) datas[pos + 1].append(text)
def do_extract(mysite, maintitle, user, codelang, cache): prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8') if not prefix: return ret_val(E_ERROR, "no prefix") djvuname = maintitle.replace(u' ', u'_') print djvuname.encode('utf-8') text_layer = align.get_djvu(cache, mysite, djvuname, True) if not text_layer: return ret_val(E_ERROR, "unable to retrieve text layer") text = u'' for pos, page_text in enumerate(text_layer): text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode(pos+1) + u']]==\n' text += page_text + u'\n' page = pywikibot.Page(mysite, u'User:'******'/Text') safe_put(page, text, comment = u'extract text') return ret_val(E_OK, "")
def do_extract(mysite, maintitle, user, codelang, cache): prefix = unicode(page_prefixes['wikisource'].get(codelang), 'utf-8') if not prefix: return ret_val(E_ERROR, "no prefix") djvuname = maintitle.replace(u' ', u'_') print djvuname.encode('utf-8') text_layer = align.get_djvu(cache, mysite, djvuname, True) if not text_layer: return ret_val(E_ERROR, "unable to retrieve text layer") text = u'' for pos, page_text in enumerate(text_layer): text += u'==[[' + prefix + u':' + maintitle + u'/' + unicode( pos + 1) + u']]==\n' text += page_text + u'\n' page = pywikibot.Page(mysite, u'User:'******'/Text') safe_put(page, text, comment=u'extract text') return ret_val(E_OK, "")