예제 #1
0
def build_book(env, status_callback=None, progress_range=None):
    book = parser.Book()
    if status_callback is not None:
        progress = progress_range[0]
        num_articles = float(len(metabook.get_item_list(env.metabook,
            filter_type='article',
        )))
        if num_articles > 0:
            progress_step = int(
                (progress_range[1] - progress_range[0])/num_articles
            )
    for item in metabook.get_item_list(env.metabook):
        if item['type'] == 'chapter':
            book.children.append(parser.Chapter(item['title'].strip()))
        elif item['type'] == 'article':
            if status_callback is not None:
                status_callback(
                    status='parsing',
                    progress=progress,
                    article=item['title'],
                )
                progress += progress_step
            a = env.wiki.getParsedArticle(
                title=item['title'],
                revision=item.get('revision'),
            )
            if a is not None:
                if "displaytitle" in item:
                    a.caption = item['displaytitle']
                url = env.wiki.getURL(item['title'], item.get('revision'))
                a.url = unicode(urllib.unquote(url.encode('utf-8')), 'utf-8')
                a.authors = env.wiki.getAuthors(item['title'], revision=item.get('revision'))
                book.children.append(a)
            else:
                log.warn('No such article: %r' % item['title'])

    if status_callback is not None:
        status_callback(status='parsing', progress=progress, article='')
    return book
예제 #2
0
def make_zip_file(
    output,
    env,
    status=None,
    num_threads=10,
    imagesize=800,
):
    if status is None:
        status = lambda **kwargs: None

    if output is None:
        fd, output = tempfile.mkstemp(suffix='.zip')
        os.close(fd)

    fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output))
    os.close(fd)
    zf = zipfile.ZipFile(tmpzip, 'w')

    try:
        articles = metabook.get_item_list(env.metabook, filter_type='article')

        if num_threads > 0:
            z = ThreadedZipCreator(
                zf,
                imagesize=imagesize,
                num_threads=num_threads,
                status=status,
                num_articles=len(articles),
            )
        else:
            z = ZipCreator(
                zf,
                imagesize=imagesize,
                status=status,
                num_articles=len(articles),
            )

        # if articles:
        #     class IncProgress(object):
        #         inc = 100./len(articles)
        #         p = 0
        #         def __call__(self, title):
        #             self.p += self.inc
        #             status(progress=int(self.p), article=title)
        #     inc_progress = IncProgress()
        # else:
        #     inc_progress = None

        for item in articles:
            d = mwapidb.parse_article_url(item['title'].encode('utf-8'))
            if d is not None:
                item['title'] = d['title']
                item['revision'] = d['revision']
                wikidb = mwapidb.WikiDB(api_helper=d['api_helper'])
                imagedb = mwapidb.ImageDB(api_helper=d['api_helper'])
            else:
                wikidb = env.wiki
                imagedb = env.images
            z.addArticle(
                item['title'],
                revision=item.get('revision', None),
                wikidb=wikidb,
                imagedb=imagedb,
            )

        for license in env.get_licenses():
            z.parseArticle(
                title=license['title'],
                raw=license['wikitext'],
                wikidb=env.wiki,
                imagedb=env.images,
            )

        z.join()
        z.addObject('metabook.json', json.dumps(env.metabook))
        zf.close()
        if os.path.exists(output):  # Windows...
            os.unlink(output)
        os.rename(tmpzip, output)

        if env.images and hasattr(env.images, 'clear'):
            env.images.clear()

        status(progress=100)
        return output
    finally:
        if os.path.exists(tmpzip):
            utils.safe_unlink(tmpzip)
예제 #3
0
def make_zip_file(output, env,
    status=None,
    num_threads=10,
    imagesize=800,
):
    if status is None:
        status = lambda **kwargs: None
    
    if output is None:
        fd, output = tempfile.mkstemp(suffix='.zip')
        os.close(fd)
    
    fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output))
    os.close(fd)
    zf = zipfile.ZipFile(tmpzip, 'w')
    
    try:
        articles = metabook.get_item_list(env.metabook, filter_type='article')
        
        if num_threads > 0:
            z = ThreadedZipCreator(zf,
                imagesize=imagesize,
                num_threads=num_threads,
                status=status,
                num_articles=len(articles),
            )
        else:
            z = ZipCreator(zf,
                imagesize=imagesize,
                status=status,
                num_articles=len(articles),
            )
        
        # if articles:
        #     class IncProgress(object):
        #         inc = 100./len(articles)
        #         p = 0
        #         def __call__(self, title):
        #             self.p += self.inc
        #             status(progress=int(self.p), article=title)
        #     inc_progress = IncProgress()
        # else:
        #     inc_progress = None
        
        for item in articles:
            d = mwapidb.parse_article_url(item['title'].encode('utf-8'))
            if d is not None:
                item['title'] = d['title']
                item['revision'] = d['revision']
                wikidb = mwapidb.WikiDB(api_helper=d['api_helper'])
                imagedb = mwapidb.ImageDB(api_helper=d['api_helper'])
            else:
                wikidb = env.wiki
                imagedb = env.images
            z.addArticle(item['title'],
                revision=item.get('revision', None),
                wikidb=wikidb,
                imagedb=imagedb,
            )
        
        for license in env.get_licenses():
            z.parseArticle(
                title=license['title'],
                raw=license['wikitext'],
                wikidb=env.wiki,
                imagedb=env.images,
            )
        
        z.join()
        z.addObject('metabook.json', json.dumps(env.metabook))
        zf.close()
        if os.path.exists(output): # Windows...
            os.unlink(output)
        os.rename(tmpzip, output)
    
        if env.images and hasattr(env.images, 'clear'):
            env.images.clear()
    
        status(progress=100)
        return output
    finally:
        if os.path.exists(tmpzip):
            utils.safe_unlink(tmpzip)
def make_zip_file(output, env,
    status=None,
    num_threads=10,
    imagesize=800,
):
    if output is None:
        fd, output = tempfile.mkstemp(suffix='.zip')
        os.close(fd)
    
    fd, tmpzip = tempfile.mkstemp(suffix='.zip', dir=os.path.dirname(output))
    os.close(fd)
    zf = zipfile.ZipFile(tmpzip, 'w')
    
    try:
        articles = metabook.get_item_list(env.metabook, filter_type='article')
        
        z = ZipCreator(zf,
            imagesize=imagesize,
            num_threads=num_threads,
            status=status,
            num_articles=len(articles),
        )
        
        for item in articles:
            d = mwapidb.parse_article_url(item['title'].encode('utf-8'))
            if d is not None:
                item['title'] = d['title']
                item['revision'] = d['revision']
                wikidb = mwapidb.WikiDB(api_helper=d['api_helper'])
                imagedb = mwapidb.ImageDB(api_helper=d['api_helper'])
            else:
                wikidb = env.wiki
                imagedb = env.images
            z.addArticle(item['title'],
                revision=item.get('revision', None),
                wikidb=wikidb,
                imagedb=imagedb,
            )

        for license in env.get_licenses():
            z.parseArticle(
                title=license['title'],
                raw=license['wikitext'],
                wikidb=env.wiki,
                imagedb=env.images,
            )
        
        z.join()
        
        # using check() is a bit rigorous: sometimes articles just cannot be
        # fetched -- PDFs should be generated nevertheless
        #z.check(articles)

        z.addObject('metabook.json', json.dumps(env.metabook))

        # add stats for later analysis
        z.node_stats["Chapter"] = len(metabook.get_item_list(env.metabook, filter_type='chapter'))
        z.addObject('node_stats.json', json.dumps(z.node_stats)) 

        zf.close()
        if os.path.exists(output): # Windows...
            os.unlink(output)
        os.rename(tmpzip, output)
    
        if env.images and hasattr(env.images, 'clear'):
            env.images.clear()
    
        if status is not None:
            status(progress=100)
        return output
    finally:
        if os.path.exists(tmpzip):
            utils.safe_unlink(tmpzip)