Beispiel #1
0
    def makewiki(self):
        kw = self.options.__dict__.copy()
        kw["metabook"] = self.metabook
        
        env = wiki.makewiki(**kw)
        
        if not env.metabook:
            self.metabook = env.metabook = metabook.collection()
            env.init_metabook()
            
        if self.options.noimages:
            env.images = None

        def setmb(name):
            n = getattr(self.options, name)
            if n:
                env.metabook[name] = n

        setmb("title")
        setmb("subtitle")
        setmb("editor")

        # add default licenses
        cfg = self.options.config or ""
        
        if cfg.startswith(":") and not env.metabook.licenses:
            mw_license_url = wiki.wpwikis.get(cfg[1:])['mw_license_url']
            env.metabook.licenses.append(dict(mw_license_url=mw_license_url,
                                              type="license"))

        return env
Beispiel #2
0
    def makewiki(self):
        kw = self.options.__dict__.copy()
        kw["metabook"] = self.metabook
        
        env = wiki.makewiki(**kw)
        
        if not env.metabook:
            self.metabook = env.metabook = metabook.collection()
            env.init_metabook()
            
        if self.options.noimages:
            env.images = None

        def setmb(name):
            n = getattr(self.options, name)
            if n:
                env.metabook[name] = n

        setmb("title")
        setmb("subtitle")
        setmb("editor")

        # add default licenses
        cfg = self.options.config or ""
        
        if cfg.startswith(":") and not env.metabook.licenses:
            mw_license_url = wiki.wpwikis.get(cfg[1:])['mw_license_url']
            env.metabook.licenses.append(dict(mw_license_url=mw_license_url,
                                              type="license"))

        return env
Beispiel #3
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        return self.options, self.args
Beispiel #4
0
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None):
    id2wiki = {}
    for x in metabook.wikis:
        id2wiki[x.ident] = (x, [])

    for x in metabook.articles():
        assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (x.wikiident, x)
        id2wiki[x.wikiident][1].append(x)

    is_multiwiki = len(id2wiki) > 1

    if is_multiwiki:
        progress = fetch.shared_progress(status=status)
    else:
        progress = None

    fetchers = []
    for id, (wikiconf, articles) in id2wiki.items():
        if id is None:
            id = ""
            assert not is_multiwiki, "id must be set in multiwiki"

        if not is_multiwiki:
            id = ""

        assert "/" not in id, "bad id: %r" % (id,)
        my_fsdir = os.path.join(fsdir, id)

        if is_multiwiki:
            my_mb = collection()
            my_mb.items = articles
        else:
            my_mb = metabook

        wikitrust(wikiconf.baseurl, my_mb)

        fetchers.append(start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl,
                                      metabook=my_mb, options=options, podclient=podclient, status=status))

    if is_multiwiki:
        if not os.path.exists(fsdir):
            os.makedirs(fsdir)
        open(os.path.join(fsdir, "metabook.json"), "wb").write(metabook.dumps())
        myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb"))

    pool = gevent.pool.Pool()
    for x in fetchers:
        pool.spawn(x.run)
    pool.join(raise_error=True)

    import signal
    signal.signal(signal.SIGINT, signal.SIG_DFL)
    signal.signal(signal.SIGTERM, signal.SIG_DFL)
def parse_collection_page(wikitext):
    """Parse wikitext of a MediaWiki collection page created by the Collection
    extension for MediaWiki.
    
    @param wikitext: wikitext of a MediaWiki collection page
    @type mwcollection: unicode
    
    @returns: metabook.collection
    @rtype: metabook.collection
    """
    mb = metabook.collection()

    
    summary = False
    noTemplate = True
    for line in wikitext.splitlines():
        line = line.strip()
        if not line:
            continue
        res = alltogether_rex.search(line)
        if not res:
            continue
        
        #look for initial templates and summaries
        #multilinetemplates need different handling to those that fit into one line
        if res.group('template_end') or res.group('template'):
            summary = True
            noTemplate = False
        elif res.group('template_start'):
            noTemplate = False
        elif res.group('summary'):
            pass
        else:
            summary = False
            noTemplate = False

        if res.group('title'):
            mb.title = res.group('title').strip()
        elif res.group('subtitle'):
            mb.subtitle = res.group('subtitle').strip()
        elif res.group('chapter'):
            mb.items.append(metabook.chapter(title=res.group('chapter').strip()))
        elif res.group('article'):
            mb.append_article(res.group('article'), res.group('displaytitle'))
        elif res.group('oldarticle'):
            mb.append_article(title=res.group('oldarticle'), displaytitle=res.group('olddisplaytitle'), revision=res.group('oldid'))
        elif res.group('summary') and (noTemplate or summary):
            mb.summary += res.group('summary') + " "

    return mb
Beispiel #6
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
Beispiel #7
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
Beispiel #8
0
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None):
    id2wiki = {}
    for x in metabook.wikis:
        id2wiki[x.ident] = (x, [])

    for x in metabook.articles():
        assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (
            x.wikiident, x)
        id2wiki[x.wikiident][1].append(x)

    is_multiwiki = len(id2wiki) > 1

    if is_multiwiki:
        progress = fetch.shared_progress(status=status)
    else:
        progress = None

    fetchers = []
    for id, (wikiconf, articles) in id2wiki.items():
        if id is None:
            id = ""
            assert not is_multiwiki, "id must be set in multiwiki"

        if not is_multiwiki:
            id = ""

        assert "/" not in id, "bad id: %r" % (id, )
        my_fsdir = os.path.join(fsdir, id)

        if is_multiwiki:
            my_mb = collection()
            my_mb.items = articles
        else:
            my_mb = metabook

        wikitrust(wikiconf.baseurl, my_mb)

        fetchers.append(
            start_fetcher(fsdir=my_fsdir,
                          progress=progress,
                          base_url=wikiconf.baseurl,
                          metabook=my_mb,
                          options=options,
                          podclient=podclient,
                          status=status))

    if is_multiwiki:
        if not os.path.exists(fsdir):
            os.makedirs(fsdir)
        open(os.path.join(fsdir, "metabook.json"),
             "wb").write(metabook.dumps())
        myjson.dump(dict(format="multi-nuwiki"),
                    open(os.path.join(fsdir, "nfo.json"), "wb"))

    pool = gevent.pool.Pool()
    for x in fetchers:
        pool.spawn(x.run)
    pool.join(raise_error=True)

    import signal
    signal.signal(signal.SIGINT, signal.SIG_DFL)
    signal.signal(signal.SIGTERM, signal.SIG_DFL)
Beispiel #9
0
#! /usr/bin/env python

from mwlib import metabook

c = metabook.collection()
c.append_article(title="Mainz", wikiident="de")
c.append_article(title="Mainz", wikiident="en")
c.wikis.append(metabook.wikiconf(ident="de", baseurl="http://de.wikipedia.org/w/"))
c.wikis.append(metabook.wikiconf(ident="en", baseurl="http://en.wikipedia.org/w/"))

print c.dumps()

                                 
Beispiel #10
0
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None):
    id2wiki = {}
    for x in metabook.wikis:
        id2wiki[x.ident] = (x, [])

    for x in metabook.articles():
        assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (x.wikiident,  x)
        id2wiki[x.wikiident][1].append(x)

    is_multiwiki = len(id2wiki)>1
    
    if is_multiwiki:
        progress = fetch.shared_progress(status=status)
    else:
        progress = None
        
    fetchers =[]
    for id, (wikiconf, articles) in id2wiki.items():
        if id is None:
            id = ""
            assert not is_multiwiki, "id must be set in multiwiki"

        if not is_multiwiki:
            id = ""
        
        assert "/" not in id, "bad id: %r" % (id,)
        my_fsdir = os.path.join(fsdir, id)
        
        if is_multiwiki:
            my_mb = collection()
            my_mb.items = articles
        else:
            my_mb = metabook

        wikitrust(wikiconf.baseurl, my_mb)

        fetchers.append(start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl, metabook=my_mb, options=options, podclient=podclient, status=status))

    if is_multiwiki:
        if not os.path.exists(fsdir):
            os.makedirs(fsdir)
        open(os.path.join(fsdir, "metabook.json"),  "wb").write(metabook.dumps())
        myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb"))
        
        
    retval = []
    def done(listres):
        retval.extend(listres)
        reactor.stop()

    def run():
        return defer.DeferredList([x.run() for x in fetchers])
            
    reactor.callLater(0.0, lambda: run().addBoth(done))
    reactor.run()
    import signal
    signal.signal(signal.SIGINT,  signal.SIG_DFL)
    signal.signal(signal.SIGTERM,  signal.SIG_DFL)
    
    if not retval:
        raise KeyboardInterrupt("interrupted")

    for success, val in retval:
        if not success:
            raise RuntimeError(str(val))