def build_book(env, status_callback=None, progress_range=None): book = parser.Book() if status_callback is not None: progress = progress_range[0] num_articles = float(len(metabook.get_item_list(env.metabook, filter_type='article', ))) if num_articles > 0: progress_step = int( (progress_range[1] - progress_range[0])/num_articles ) for item in metabook.get_item_list(env.metabook): if item['type'] == 'chapter': book.children.append(parser.Chapter(item['title'].strip())) elif item['type'] == 'article': if status_callback is not None: status_callback( status='parsing', progress=progress, article=item['title'], ) progress += progress_step a = env.wiki.getParsedArticle( title=item['title'], revision=item.get('revision'), ) if a is not None: if "displaytitle" in item: a.caption = item['displaytitle'] url = env.wiki.getURL(item['title'], item.get('revision')) a.url = unicode(urllib.unquote(url.encode('utf-8')), 'utf-8') a.authors = env.wiki.getAuthors(item['title'], revision=item.get('revision')) book.children.append(a) else: log.warn('No such article: %r' % item['title']) if status_callback is not None: status_callback(status='parsing', progress=progress, article='') return book
def make_zip_file( output, env, status=None, num_threads=10, imagesize=800, ): if status is None: status = lambda **kwargs: None if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') if num_threads > 0: z = ThreadedZipCreator( zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) else: z = ZipCreator( zf, imagesize=imagesize, status=status, num_articles=len(articles), ) # if articles: # class IncProgress(object): # inc = 100./len(articles) # p = 0 # def __call__(self, title): # self.p += self.inc # status(progress=int(self.p), article=title) # inc_progress = IncProgress() # else: # inc_progress = None for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle( item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() z.addObject('metabook.json', json.dumps(env.metabook)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)
def make_zip_file(output, env, status=None, num_threads=10, imagesize=800, ): if status is None: status = lambda **kwargs: None if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') if num_threads > 0: z = ThreadedZipCreator(zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) else: z = ZipCreator(zf, imagesize=imagesize, status=status, num_articles=len(articles), ) # if articles: # class IncProgress(object): # inc = 100./len(articles) # p = 0 # def __call__(self, title): # self.p += self.inc # status(progress=int(self.p), article=title) # inc_progress = IncProgress() # else: # inc_progress = None for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle(item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() z.addObject('metabook.json', json.dumps(env.metabook)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)
def make_zip_file(output, env, status=None, num_threads=10, imagesize=800, ): if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') z = ZipCreator(zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle(item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() # using check() is a bit rigorous: sometimes articles just cannot be # fetched -- PDFs should be generated nevertheless #z.check(articles) z.addObject('metabook.json', json.dumps(env.metabook)) # add stats for later analysis z.node_stats["Chapter"] = len(metabook.get_item_list(env.metabook, filter_type='chapter')) z.addObject('node_stats.json', json.dumps(z.node_stats)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() if status is not None: status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)