def makewiki(self): kw = self.options.__dict__.copy() kw["metabook"] = self.metabook env = wiki.makewiki(**kw) if not env.metabook: self.metabook = env.metabook = metabook.collection() env.init_metabook() if self.options.noimages: env.images = None def setmb(name): n = getattr(self.options, name) if n: env.metabook[name] = n setmb("title") setmb("subtitle") setmb("editor") # add default licenses cfg = self.options.config or "" if cfg.startswith(":") and not env.metabook.licenses: mw_license_url = wiki.wpwikis.get(cfg[1:])['mw_license_url'] env.metabook.licenses.append(dict(mw_license_url=mw_license_url, type="license")) return env
def parse_args(self): self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]]) for c in self.config_values: if not hasattr(c, "pages"): c.pages = [] if self.options.logfile: start_logging(self.options.logfile) if self.options.metabook: self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8')) try: self.options.imagesize = int(self.options.imagesize) assert self.options.imagesize > 0 except (ValueError, AssertionError): self.error('Argument for --imagesize must be an integer > 0.') for title in self.args: if self.metabook is None: self.metabook = metabook.collection() self.metabook.append_article(title) return self.options, self.args
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None): id2wiki = {} for x in metabook.wikis: id2wiki[x.ident] = (x, []) for x in metabook.articles(): assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (x.wikiident, x) id2wiki[x.wikiident][1].append(x) is_multiwiki = len(id2wiki) > 1 if is_multiwiki: progress = fetch.shared_progress(status=status) else: progress = None fetchers = [] for id, (wikiconf, articles) in id2wiki.items(): if id is None: id = "" assert not is_multiwiki, "id must be set in multiwiki" if not is_multiwiki: id = "" assert "/" not in id, "bad id: %r" % (id,) my_fsdir = os.path.join(fsdir, id) if is_multiwiki: my_mb = collection() my_mb.items = articles else: my_mb = metabook wikitrust(wikiconf.baseurl, my_mb) fetchers.append(start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl, metabook=my_mb, options=options, podclient=podclient, status=status)) if is_multiwiki: if not os.path.exists(fsdir): os.makedirs(fsdir) open(os.path.join(fsdir, "metabook.json"), "wb").write(metabook.dumps()) myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb")) pool = gevent.pool.Pool() for x in fetchers: pool.spawn(x.run) pool.join(raise_error=True) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL)
def parse_collection_page(wikitext): """Parse wikitext of a MediaWiki collection page created by the Collection extension for MediaWiki. @param wikitext: wikitext of a MediaWiki collection page @type mwcollection: unicode @returns: metabook.collection @rtype: metabook.collection """ mb = metabook.collection() summary = False noTemplate = True for line in wikitext.splitlines(): line = line.strip() if not line: continue res = alltogether_rex.search(line) if not res: continue #look for initial templates and summaries #multilinetemplates need different handling to those that fit into one line if res.group('template_end') or res.group('template'): summary = True noTemplate = False elif res.group('template_start'): noTemplate = False elif res.group('summary'): pass else: summary = False noTemplate = False if res.group('title'): mb.title = res.group('title').strip() elif res.group('subtitle'): mb.subtitle = res.group('subtitle').strip() elif res.group('chapter'): mb.items.append(metabook.chapter(title=res.group('chapter').strip())) elif res.group('article'): mb.append_article(res.group('article'), res.group('displaytitle')) elif res.group('oldarticle'): mb.append_article(title=res.group('oldarticle'), displaytitle=res.group('olddisplaytitle'), revision=res.group('oldid')) elif res.group('summary') and (noTemplate or summary): mb.summary += res.group('summary') + " " return mb
def parse_args(self): self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]]) for c in self.config_values: if not hasattr(c, "pages"): c.pages = [] if self.options.logfile: start_logging(self.options.logfile) if self.options.metabook: self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8')) try: self.options.imagesize = int(self.options.imagesize) assert self.options.imagesize > 0 except (ValueError, AssertionError): self.error('Argument for --imagesize must be an integer > 0.') for title in self.args: if self.metabook is None: self.metabook = metabook.collection() self.metabook.append_article(title) if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern: self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,)) if self.options.print_template_prefix and self.options.print_template_pattern: log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.') elif self.options.print_template_prefix: self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix del self.options.print_template_prefix return self.options, self.args
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None): id2wiki = {} for x in metabook.wikis: id2wiki[x.ident] = (x, []) for x in metabook.articles(): assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % ( x.wikiident, x) id2wiki[x.wikiident][1].append(x) is_multiwiki = len(id2wiki) > 1 if is_multiwiki: progress = fetch.shared_progress(status=status) else: progress = None fetchers = [] for id, (wikiconf, articles) in id2wiki.items(): if id is None: id = "" assert not is_multiwiki, "id must be set in multiwiki" if not is_multiwiki: id = "" assert "/" not in id, "bad id: %r" % (id, ) my_fsdir = os.path.join(fsdir, id) if is_multiwiki: my_mb = collection() my_mb.items = articles else: my_mb = metabook wikitrust(wikiconf.baseurl, my_mb) fetchers.append( start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl, metabook=my_mb, options=options, podclient=podclient, status=status)) if is_multiwiki: if not os.path.exists(fsdir): os.makedirs(fsdir) open(os.path.join(fsdir, "metabook.json"), "wb").write(metabook.dumps()) myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb")) pool = gevent.pool.Pool() for x in fetchers: pool.spawn(x.run) pool.join(raise_error=True) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL)
#! /usr/bin/env python from mwlib import metabook c = metabook.collection() c.append_article(title="Mainz", wikiident="de") c.append_article(title="Mainz", wikiident="en") c.wikis.append(metabook.wikiconf(ident="de", baseurl="http://de.wikipedia.org/w/")) c.wikis.append(metabook.wikiconf(ident="en", baseurl="http://en.wikipedia.org/w/")) print c.dumps()
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None): id2wiki = {} for x in metabook.wikis: id2wiki[x.ident] = (x, []) for x in metabook.articles(): assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (x.wikiident, x) id2wiki[x.wikiident][1].append(x) is_multiwiki = len(id2wiki)>1 if is_multiwiki: progress = fetch.shared_progress(status=status) else: progress = None fetchers =[] for id, (wikiconf, articles) in id2wiki.items(): if id is None: id = "" assert not is_multiwiki, "id must be set in multiwiki" if not is_multiwiki: id = "" assert "/" not in id, "bad id: %r" % (id,) my_fsdir = os.path.join(fsdir, id) if is_multiwiki: my_mb = collection() my_mb.items = articles else: my_mb = metabook wikitrust(wikiconf.baseurl, my_mb) fetchers.append(start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl, metabook=my_mb, options=options, podclient=podclient, status=status)) if is_multiwiki: if not os.path.exists(fsdir): os.makedirs(fsdir) open(os.path.join(fsdir, "metabook.json"), "wb").write(metabook.dumps()) myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb")) retval = [] def done(listres): retval.extend(listres) reactor.stop() def run(): return defer.DeferredList([x.run() for x in fetchers]) reactor.callLater(0.0, lambda: run().addBoth(done)) reactor.run() import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) if not retval: raise KeyboardInterrupt("interrupted") for success, val in retval: if not success: raise RuntimeError(str(val))