def make_zip_file(output, env, status=None, num_threads=10, imagesize=800, ): if status is None: status = lambda **kwargs: None if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') if num_threads > 0: z = ThreadedZipCreator(zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) else: z = ZipCreator(zf, imagesize=imagesize, status=status, num_articles=len(articles), ) # if articles: # class IncProgress(object): # inc = 100./len(articles) # p = 0 # def __call__(self, title): # self.p += self.inc # status(progress=int(self.p), article=title) # inc_progress = IncProgress() # else: # inc_progress = None for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle(item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() z.addObject('metabook.json', json.dumps(env.metabook)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)
def make_zip_file( output, env, status=None, num_threads=10, imagesize=800, ): if status is None: status = lambda **kwargs: None if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') if num_threads > 0: z = ThreadedZipCreator( zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) else: z = ZipCreator( zf, imagesize=imagesize, status=status, num_articles=len(articles), ) # if articles: # class IncProgress(object): # inc = 100./len(articles) # p = 0 # def __call__(self, title): # self.p += self.inc # status(progress=int(self.p), article=title) # inc_progress = IncProgress() # else: # inc_progress = None for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle( item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() z.addObject('metabook.json', json.dumps(env.metabook)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)
def make_zip_file(output, env, status=None, num_threads=10, imagesize=800, ): if output is None: fd, output = tempfile.mkstemp(suffix='.zip') os.close(fd) fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output)) os.close(fd) zf = zipfile.ZipFile(tmpzip, 'w') try: articles = metabook.get_item_list(env.metabook, filter_type='article') z = ZipCreator(zf, imagesize=imagesize, num_threads=num_threads, status=status, num_articles=len(articles), ) for item in articles: d = mwapidb.parse_article_url(item['title'].encode('utf-8')) if d is not None: item['title'] = d['title'] item['revision'] = d['revision'] wikidb = mwapidb.WikiDB(api_helper=d['api_helper']) imagedb = mwapidb.ImageDB(api_helper=d['api_helper']) else: wikidb = env.wiki imagedb = env.images z.addArticle(item['title'], revision=item.get('revision', None), wikidb=wikidb, imagedb=imagedb, ) for license in env.get_licenses(): z.parseArticle( title=license['title'], raw=license['wikitext'], wikidb=env.wiki, imagedb=env.images, ) z.join() # using check() is a bit rigorous: sometimes articles just cannot be # fetched -- PDFs should be generated nevertheless #z.check(articles) z.addObject('metabook.json', json.dumps(env.metabook)) # add stats for later analysis z.node_stats["Chapter"] = len(metabook.get_item_list(env.metabook, filter_type='chapter')) z.addObject('node_stats.json', json.dumps(z.node_stats)) zf.close() if os.path.exists(output): # Windows... os.unlink(output) os.rename(tmpzip, output) if env.images and hasattr(env.images, 'clear'): env.images.clear() if status is not None: status(progress=100) return output finally: if os.path.exists(tmpzip): utils.safe_unlink(tmpzip)