コード例 #1
0
    def _read_revisions(self):
        count = 1
        while True:
            fn = self._pathjoin("revisions-%s.txt" % count)
            if not os.path.exists(fn):
                break
            count += 1
            print "reading", fn
            d = unicode(open(self._pathjoin(fn), "rb").read(), "utf-8")
            pages = d.split("\n --page-- ")

            for p in pages[1:]:
                jmeta, rawtext = p.split("\n", 1)
                meta = json.loads(jmeta)
                pg = Page(meta, rawtext)
                if pg.title in self.excluded and pg.ns != 0:
                    pg.rawtext = unichr(0xebad)
                revid = meta.get("revid")
                if revid is None:
                    self.revisions[pg.title] = pg
                    continue

                self.revisions[meta["revid"]] = pg

                # else:
                #     print "excluding:", repr(pg.title)

        tmp = self.revisions.items()
        tmp.sort(reverse=True)
        for revid, p in tmp:
            title = p.title
            if title not in self.revisions:
                self.revisions[title] = p
コード例 #2
0
ファイル: nuwiki.py プロジェクト: hexmode/mwlib
    def _read_revisions(self):
        count = 1
        while 1:
            fn = self._pathjoin("revisions-%s.txt" % count)
            if not os.path.exists(fn):
                break
            count += 1
            print "reading", fn
            d = unicode(open(self._pathjoin(fn), "rb").read(), "utf-8")
            pages = d.split("\n --page-- ")

            for p in pages[1:]:
                jmeta, rawtext = p.split("\n", 1)
                meta = json.loads(jmeta)
                pg = Page(meta, rawtext)
                if pg.title in self.excluded and pg.ns != 0:
                    pg.rawtext = unichr(0xEBAD)
                revid = meta.get("revid")
                if revid is None:
                    self.revisions[pg.title] = pg
                    continue

                self.revisions[meta["revid"]] = pg

                # else:
                #     print "excluding:", repr(pg.title)

        tmp = self.revisions.items()
        tmp.sort(reverse=True)
        for revid, p in tmp:
            title = p.title
            if title not in self.revisions:
                self.revisions[title] = p
コード例 #3
0
ファイル: nserve.py プロジェクト: vprusa/mwlib
    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError as exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(
                unicode(
                    urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(
            channel="post",  # jobid="%s:post" % collection_id,
            payload=dict(params=params.__dict__),
            timeout=20 * 60)
        return response
コード例 #4
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        return self.options, self.args
コード例 #5
0
ファイル: nserve.py プロジェクト: pediapress/mwlib
    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError as exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(channel="post",  # jobid="%s:post" % collection_id,
                         payload=dict(params=params.__dict__),
                         timeout=20 * 60)
        return response
コード例 #6
0
def make_collection_id(data):
    sio = StringIO.StringIO()
    sio.write(str(_version.version))
    for key in (
            'base_url',
            'script_extension',
            'template_blacklist',
            'template_exclusion_category',
            'print_template_prefix',
            'print_template_pattern',
            'login_credentials',
    ):
        sio.write(repr(data.get(key)))
    mb = data.get('metabook')
    if mb:
        if isinstance(mb, str):
            mb = unicode(mb, 'utf-8')
        mbobj = json.loads(mb)
        sio.write(calc_checksum(mbobj))
        num_articles = len(list(mbobj.articles()))
        sys.stdout.write(
            "new-collection %s\t%r\t%r\n" %
            (num_articles, data.get("base_url"), data.get("writer")))

    return md5(sio.getvalue()).hexdigest()[:16]
コード例 #7
0
ファイル: serve.py プロジェクト: aarddict/mwlib
 def read_status_file(self, collection_id, writer):
     status_path = self.get_path(collection_id, self.status_filename, writer)
     try:
         f = open(status_path, 'rb')
         return json.loads(unicode(f.read(), 'utf-8'))
         f.close()
     except (IOError, ValueError):
         return {'progress': 0}
コード例 #8
0
ファイル: zipwiki.py プロジェクト: aarddict/mwlib
    def __init__(self, zipfile):
        """
        @type zipfile: basestring or ZipFile
        """

        if hasattr(zipfile, "read"):
            self.zf = zipfile
        else:
            self.zf = ZipFile(zipfile)

        self.metabook = json.loads(unicode(self.zf.read("metabook.json"), 'utf-8'))
        content = json.loads(unicode(self.zf.read('content.json'), 'utf-8'))
        
        
        self.images = content.get('images', {})
        self.sources = content.get('sources', {})
        self.licenses = content.get('licenses', None)
        self.siteinfo = content.get('siteinfo', None)
        self.nshandler = nshandling.nshandler(self.get_siteinfo())

        self.pages = {}

        def addpages(name2val, defaultns):        
            for title, vals in name2val.items():
                title = self.nshandler.get_fqname(title, defaultns)

                fixed = {}
                for k, v in vals.items():
                    k=str(k).replace("-",  "_")
                    if k=="content":
                        k="rawtext"
                    fixed[k]=v
                    
                self.pages[title] = page(**fixed)

        addpages(content.get('templates', {}), 10)
        addpages(content.get('articles', {}), 0)
コード例 #9
0
ファイル: nserve.py プロジェクト: BroBeurKids/mwlib
def make_collection_id(data):
    sio = StringIO.StringIO()
    sio.write(str(_version.version))
    for key in ("base_url", "script_extension", "login_credentials"):
        sio.write(repr(data.get(key)))
    mb = data.get("metabook")
    if mb:
        if isinstance(mb, str):
            mb = unicode(mb, "utf-8")
        mbobj = json.loads(mb)
        sio.write(calc_checksum(mbobj))
        num_articles = len(list(mbobj.articles()))
        sys.stdout.write("new-collection %s\t%r\t%r\n" % (num_articles, data.get("base_url"), data.get("writer")))

    return md5(sio.getvalue()).hexdigest()[:16]
コード例 #10
0
ファイル: nslave.py プロジェクト: ASaifM/mwlib
def suggest_filename(metabook_data):
    if not metabook_data:
        return None

    from mwlib import myjson
    mb = myjson.loads(metabook_data)

    def suggestions():
        yield mb.title
        for a in mb.items:
            yield a.title

    for x in suggestions():
        if x and x.strip():
            return x.strip()
コード例 #11
0
def suggest_filename(metabook_data):
    if not metabook_data:
        return None

    from mwlib import myjson
    mb = myjson.loads(metabook_data)

    def suggestions():
        yield mb.title
        for a in mb.items:
            yield a.title

    for x in suggestions():
        if x and x.strip():
            return x.strip()
コード例 #12
0
ファイル: client.py プロジェクト: ASaifM/mwlib
 def request(self, command, args, is_json=True):
     self.error = None
     post_data = dict(args)
     post_data['command'] = command
     f = urllib.urlopen(self.url, urllib.urlencode(post_data))
     self.response = f.read()
     self.response_code = f.getcode()
     if self.response_code != 200:
         raise Error(self.response)
     
     if is_json:
         self.response = json.loads(self.response)
         if 'error' in self.response:
             self.error = self.response['error']
             raise Error(self.error)
         
     return self.response
コード例 #13
0
    def request(self, command, args, is_json=True):
        self.error = None
        post_data = dict(args)
        post_data['command'] = command
        f = urllib.urlopen(self.url, urllib.urlencode(post_data))
        self.response = f.read()
        self.response_code = f.getcode()
        if self.response_code != 200:
            raise Error(self.response)

        if is_json:
            self.response = json.loads(self.response)
            if 'error' in self.response:
                self.error = self.response['error']
                raise Error(self.error)

        return self.response
コード例 #14
0
ファイル: nserve.py プロジェクト: alvarin32/mwlib
def make_collection_id(data):
    sio = StringIO.StringIO()
    sio.write(str(_version.version))
    for key in (
        'base_url',
        'script_extension',
        'template_blacklist',
        'template_exclusion_category',
        'print_template_prefix',
        'print_template_pattern',
        'login_credentials',
    ):
        sio.write(repr(data.get(key)))
    mb = data.get('metabook')
    if mb:
        if isinstance(mb, str):
            mb = unicode(mb, 'utf-8')
        mbobj = json.loads(mb)
        sio.write(calc_checksum(mbobj))
        num_articles = len(list(mbobj.articles()))
        sys.stdout.write("new-collection %s\t%r\t%r\n" % (num_articles, data.get("base_url"), data.get("writer")))

    return md5(sio.getvalue()).hexdigest()[:16]
コード例 #15
0
ファイル: options.py プロジェクト: ingob/mwlib
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
コード例 #16
0
ファイル: options.py プロジェクト: hpschry/mwlib
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
コード例 #17
0
ファイル: wiki.py プロジェクト: vprusa/mwlib
def _makewiki(conf, metabook=None, **kw):
    kw = ndict(**kw)
    res = Environment(metabook)

    url = None
    if conf.startswith(':'):
        if conf[1:] not in wpwikis:
            wpwikis[conf[1:]] = dict(baseurl="http://%s.wikipedia.org/w/" %
                                     conf[1:],
                                     mw_license_url=None)

        url = wpwikis.get(conf[1:])['baseurl']

    if conf.startswith("http://") or conf.startswith("https://"):
        url = conf

    if url:
        res.wiki = None
        res.wikiconf = wikiconf(baseurl=url, **kw)
        res.image = None
        return res

    nfo_fn = os.path.join(conf, 'nfo.json')
    if os.path.exists(nfo_fn):
        from mwlib import nuwiki
        from mwlib import myjson as json

        try:
            format = json.load(open(nfo_fn, 'rb'))['format']
        except KeyError:
            pass
        else:
            if format == 'nuwiki':
                res.images = res.wiki = nuwiki.adapt(conf)
                res.metabook = res.wiki.metabook
                return res
            elif format == 'multi-nuwiki':
                return MultiEnvironment(conf)

    if os.path.exists(os.path.join(conf, "content.json")):
        raise RuntimeError("old zip wikis are not supported anymore")

    # yes, I really don't want to type this everytime
    wc = os.path.join(conf, "wikiconf.txt")
    if os.path.exists(wc):
        conf = wc

    if conf.lower().endswith(".zip"):
        import zipfile
        from mwlib import myjson as json
        conf = os.path.abspath(conf)

        zf = zipfile.ZipFile(conf)
        try:
            format = json.loads(zf.read("nfo.json"))["format"]
        except KeyError:
            raise RuntimeError("old zip wikis are not supported anymore")

        if format == "nuwiki":
            from mwlib import nuwiki
            res.images = res.wiki = nuwiki.adapt(zf)
            if metabook is None:
                res.metabook = res.wiki.metabook
            return res
        elif format == u'multi-nuwiki':
            from mwlib import nuwiki
            import tempfile
            tmpdir = tempfile.mkdtemp()
            nuwiki.extractall(zf, tmpdir)
            res = MultiEnvironment(tmpdir)
            return res
        else:
            raise RuntimeError("unknown format %r" % (format, ))

    cp = res.configparser

    if not cp.read(conf):
        raise RuntimeError("could not read config file %r" % (conf, ))

    for s in ['images', 'wiki']:
        if not cp.has_section(s):
            continue

        args = dict(cp.items(s))
        if "type" not in args:
            raise RuntimeError("section %r does not have key 'type'" % s)
        t = args['type']
        del args['type']
        try:
            m = dispatch[s][t]
        except KeyError:
            raise RuntimeError("cannot handle type %r in section %r" % (t, s))

        setattr(res, s, m(**args))

    assert res.wiki is not None, '_makewiki should have set wiki attribute'
    return res
コード例 #18
0
                    break
                yield d

        return HTTPResponse(output=readdata(), header=header)

    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError, exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url
コード例 #19
0
ファイル: test_metabook.py プロジェクト: ASaifM/mwlib
        {
            'type': 'chapter',
            'title': 'Chapter 2',
            'items': [
                {
                    'type': 'article',
                    'title': 'Article 3',
                    'displaytitle': 'Display Title',
                    'content_type': 'text/x-wiki',
                },
            ],
        },
    ],
}

test_metabook = json.loads(json.dumps(test_metabook))


def test_parse_collection_page():
    #first parsestring
    mb = metabook.parse_collection_page(test_wikitext1)
    print mb

    assert mb['type'] == 'collection'
    assert mb['version'] == 1
    assert mb['title'] == 'Title'
    assert mb['subtitle'] == 'Subtitle'
    assert mb['summary'] == 'Summary line 1 Summary line 2 '
    items = mb['items']
    assert len(items) == 2
    assert items[0]['type'] == 'chapter'
コード例 #20
0
ファイル: wiki.py プロジェクト: ASaifM/mwlib
def _makewiki(conf, metabook=None, **kw):
    kw = ndict(**kw)
    res = Environment(metabook)
    
    url = None
    if conf.startswith(':'):
        if conf[1:] not in wpwikis:
            wpwikis[conf[1:]] =  dict(baseurl = "http://%s.wikipedia.org/w/" % conf[1:],
                                      mw_license_url =  None)
            

        url = wpwikis.get(conf[1:])['baseurl']

    if conf.startswith("http://") or conf.startswith("https://"):
        url = conf

    if url:
        res.wiki = None
        res.wikiconf = wikiconf(baseurl=url, **kw)
        res.image = None
        return res

    nfo_fn = os.path.join(conf, 'nfo.json')
    if os.path.exists(nfo_fn):
        from mwlib import nuwiki
        from mwlib import myjson as json

        try:
            format = json.load(open(nfo_fn, 'rb'))['format']
        except KeyError:
            pass
        else:
            if format == 'nuwiki':
                res.images = res.wiki = nuwiki.adapt(conf)
                res.metabook = res.wiki.metabook
                return res
            elif format == 'multi-nuwiki':
                return MultiEnvironment(conf)

    if os.path.exists(os.path.join(conf, "content.json")):
        raise RuntimeError("old zip wikis are not supported anymore")

    # yes, I really don't want to type this everytime
    wc = os.path.join(conf, "wikiconf.txt")
    if os.path.exists(wc):
        conf = wc 
        
    if conf.lower().endswith(".zip"):
        import zipfile
        from mwlib import myjson as json
        conf = os.path.abspath(conf)
        
        zf = zipfile.ZipFile(conf)
        try:
            format = json.loads(zf.read("nfo.json"))["format"]
        except KeyError:
            raise RuntimeError("old zip wikis are not supported anymore")

        if format=="nuwiki":
            from mwlib import nuwiki
            res.images = res.wiki = nuwiki.adapt(zf)
            if metabook is None:
                res.metabook = res.wiki.metabook
            return res
        elif format==u'multi-nuwiki':
            from mwlib import nuwiki
            import tempfile
            tmpdir = tempfile.mkdtemp()
            nuwiki.extractall(zf, tmpdir)
            res = MultiEnvironment(tmpdir)
            return res
        else:
            raise RuntimeError("unknown format %r" % (format,))
        
    

    cp = res.configparser
    
    if not cp.read(conf):
        raise RuntimeError("could not read config file %r" % (conf,))

        
    for s in ['images', 'wiki']:
        if not cp.has_section(s):
            continue
        
        args = dict(cp.items(s))
        if "type" not in args:
            raise RuntimeError("section %r does not have key 'type'" % s)
        t = args['type']
        del args['type']
        try:
            m = dispatch[s][t]
        except KeyError:
            raise RuntimeError("cannot handle type %r in section %r" % (t, s))

        setattr(res, s, m(**args))
    
    assert res.wiki is not None, '_makewiki should have set wiki attribute'
    return res
コード例 #21
0
 def __getitem__(self, key):
     v = self.db.get(key, '')
     if v:
         return json.loads(v)
     else:
         return None
コード例 #22
0
ファイル: nserve.py プロジェクト: alvarin32/mwlib
                    break
                yield d

        return HTTPResponse(output=readdata(), header=header)

    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError, exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url
コード例 #23
0
ファイル: nuwiki.py プロジェクト: hexmode/mwlib
 def __getitem__(self, key):
     v = self.db.get(key, "")
     if v:
         return json.loads(v)
     else:
         return None
コード例 #24
0
            'chapter',
            'title':
            'Chapter 2',
            'items': [
                {
                    'type': 'article',
                    'title': 'Article 3',
                    'displaytitle': 'Display Title',
                    'content_type': 'text/x-wiki',
                },
            ],
        },
    ],
}

test_metabook = json.loads(json.dumps(test_metabook))


def test_parse_collection_page():
    #first parsestring
    mb = metabook.parse_collection_page(test_wikitext1)
    print mb

    assert mb['type'] == 'collection'
    assert mb['version'] == 1
    assert mb['title'] == 'Title'
    assert mb['subtitle'] == 'Subtitle'
    assert mb['summary'] == 'Summary line 1 Summary line 2 '
    items = mb['items']
    assert len(items) == 2
    assert items[0]['type'] == 'chapter'