def get_template_job(name): recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) try: recorddb.getTemplate(name) finally: self.template_count += 1 if self.fetchtemplate_status: self.fetchtemplate_status(progress=self.template_count*100/self.num_templates)
def addArticle( self, title, revision=None, wikidb=None, imagedb=None, ): """Add article with given title and revision to ZIP file. This will add all referenced templates and images, too. @param title: article title @type title: unicode @param revision: article revision (optional) @type revision: int @param wikidb: WikiDB to use @param imagedb: ImageDB to use (optional) """ if title in self.articles: return self.articles[title] = {} self.status(article=title) recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) raw = recorddb.getRawArticle(title, revision=revision) if raw is None: log.warn('Could not get article %r' % title) return mo = self.redirect_rex.search(raw) if mo: raw = recorddb.getRawArticle(mo.group('redirect')) if raw is None: log.warn('Could not get redirected article %r (from %r)' % (mo.group('redirect'), title)) return self.parseArticle( title, revision=revision, raw=raw, wikidb=wikidb, imagedb=imagedb, ) self.article_count += 1 if self.num_articles: self.status(progress=self.article_count * 100 // self.num_articles)
def fetch_article_job(job_id): recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) raw = recorddb.getRawArticle(title, revision=revision) if raw is None: log.warn('Could not get article %r' % title) return mo = self.redirect_rex.search(raw) if mo: raw = recorddb.getRawArticle(mo.group('redirect')) if raw is None: log.warn('Could not get redirected article %r (from %r)' % (mo.group('redirect'), title)) return
def fetch_article_job(job_id): recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) raw = recorddb.getRawArticle(title, revision=revision) if raw is None: log.warn('Could not get article %r' % title) return mo = self.redirect_rex.search(raw) if mo: raw = recorddb.getRawArticle(mo.group('redirect')) if raw is None: log.warn('Could not get redirected article %r (from %r)' % ( mo.group('redirect'), title )) return
def addArticle(self, title, revision=None, wikidb=None, imagedb=None, ): """Add article with given title and revision to ZIP file. This will add all referenced templates and images, too. @param title: article title @type title: unicode @param revision: article revision (optional) @type revision: int @param wikidb: WikiDB to use @param imagedb: ImageDB to use (optional) """ if title in self.articles: return self.articles[title] = {} self.status(article=title) recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) raw = recorddb.getRawArticle(title, revision=revision) if raw is None: log.warn('Could not get article %r' % title) return mo = self.redirect_rex.search(raw) if mo: raw = recorddb.getRawArticle(mo.group('redirect')) if raw is None: log.warn('Could not get redirected article %r (from %r)' % ( mo.group('redirect'), title )) return self.parseArticle(title, revision=revision, raw=raw, wikidb=wikidb, imagedb=imagedb, ) self.article_count += 1 if self.num_articles: self.status(progress=self.article_count*100//self.num_articles)
def fetch_article_job(job_id): if self.fetcharticle_status: self.fetcharticle_status(article=title) recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) raw = recorddb.getRawArticle(title, revision=revision) if raw is None: log.warn('Could not get article %r' % title) else: mo = self.redirect_rex.search(raw) if mo: raw = recorddb.getRawArticle(mo.group('redirect')) if raw is None: log.warn('Could not get redirected article %r (from %r)' % ( mo.group('redirect'), title )) self.article_count += 1 if self.fetcharticle_status: self.fetcharticle_status(progress=self.article_count*100/self.num_articles)
def parseArticle( self, title, revision=None, raw=None, wikidb=None, imagedb=None, ): """Parse article with given title, revision and raw wikitext, adding all referenced templates and images, but not adding the article itself. @param title: title of article @type title: unicode @param revision: revision of article (optional) @type revision: int @param raw: wikitext of article @type raw: unicode @param wikidb: WikiDB to use @param imagedb: ImageDB to use (optional) """ recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) parse_tree = uparser.parseString( title, revision=revision, raw=raw, wikidb=recorddb, ) if imagedb is None: return for node in parse_tree.allchildren(): if isinstance(node, parser.ImageLink): self.addImage(node.target, imagedb=imagedb, wikidb=wikidb) elif isinstance(node, parser.TagNode) and node.caption == 'imagemap': imagemap = getattr(node, 'imagemap', None) if imagemap is not None: imagelink = getattr(imagemap, 'imagelink', None) if imagelink is not None: self.addImage(imagelink.target, imagedb=imagedb, wikidb=wikidb)
def get_template_job(name): recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) recorddb.getTemplate(name)