Ejemplo n.º 1
0
 def addImage(self, name, imagedb=None, wikidb=None):
     """Add image with given name to the ZIP file
     
     @param name: image name
     @type name: unicode
     
     @param imagedb: ImageDB to use
     """
     
     if name in self.images:
         return
     self.images[name] = {}
     
     path = imagedb.getDiskPath(name, size=self.imagesize)
     if path is None:
         log.warn('Could not get image %r' % name)
         return
     zipname = u"images/%s" % name.replace("'", '-')
     self.zf.write(path, zipname.encode("utf-8"))
     self.images[name]['url'] = imagedb.getURL(name, size=self.imagesize)
     descriptionurl = imagedb.getDescriptionURL(name)
     if descriptionurl:
         self.images[name]['descriptionurl'] = descriptionurl
     templates = imagedb.getImageTemplates(name, wikidb=wikidb)
     if templates:
         self.images[name]['templates'] = templates
Ejemplo n.º 2
0
    def addImage(self, name, imagedb=None, wikidb=None):
        """Add image with given name to the ZIP file
        
        @param name: image name
        @type name: unicode
        
        @param imagedb: ImageDB to use
        """

        if name in self.images:
            return
        self.images[name] = {}

        path = imagedb.getDiskPath(name, size=self.imagesize)
        if path is None:
            log.warn('Could not get image %r' % name)
            return
        zipname = u"images/%s" % name.replace("'", '-')
        self.zf.write(path, zipname.encode("utf-8"))
        self.images[name]['url'] = imagedb.getURL(name, size=self.imagesize)
        descriptionurl = imagedb.getDescriptionURL(name)
        if descriptionurl:
            self.images[name]['descriptionurl'] = descriptionurl
        templates = imagedb.getImageTemplates(name, wikidb=wikidb)
        if templates:
            self.images[name]['templates'] = templates
 def fetch_image_job(name):
     path = imagedb.getDiskPath(name, size=self.imagesize)
     if path is None:
         log.warn('Could not get image %r' % name)
         return
     self.zf_lock.acquire()
     try:
         zipname = u"images/%s" % name.replace("'", '-')
         self.zf.write(path, zipname.encode("utf-8"))
     finally:
         self.zf_lock.release()
     self.images[name]['url'] = imagedb.getURL(name, size=self.imagesize)
     descriptionurl = imagedb.getDescriptionURL(name)
     if descriptionurl:
         self.images[name]['descriptionurl'] = descriptionurl
     templates = imagedb.getImageTemplates(name, wikidb=wikidb)
     if templates:
         self.images[name]['templates'] = templates
     if hasattr(imagedb, 'getContributors'):
         contribs = imagedb.getContributors(name, wikidb=wikidb)
         if contribs:
             self.images[name]['contributors'] = contribs
     if self.fetchimages_status:
         self.image_count += 1
         self.fetchimages_status(progress=self.image_count*100/self.num_images)
Ejemplo n.º 4
0
    def do_download(self, collection_id, post_data, is_new=False):
        if is_new:
            return self.error_response('POST argument required: collection_id')

        writer = post_data.get('writer', self.default_writer)
        
        try:
            log.info('download %s %s' % (collection_id, writer))
        
            output_path = self.get_path(collection_id, self.output_filename, writer)
            os.utime(output_path, None)
            status = self.read_status_file(collection_id, writer)
            response = Response()
            response.app_iter = FileIterable(output_path)
            response.content_length = os.path.getsize(output_path)
            if 'content_type' in status:
                response.content_type = status['content_type'].encode('utf-8', 'ignore')
            else:
                log.warn('no content type in status file')
            if 'file_extension' in status:
                response.headers['Content-Disposition'] = 'inline; filename=collection.%s' %  (
                    status['file_extension'].encode('utf-8', 'ignore'),
                )
            else:
                log.warn('no file extension in status file')
            return response
        except Exception, exc:
            log.ERROR('exception in do_download(): %r' % exc)
            return Response(status=500)
Ejemplo n.º 5
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            log.info("parsing template", repr(name))
            res = parse(raw)
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Ejemplo n.º 6
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            log.info("parsing template", repr(name))
            res = parse(raw)
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()
                
        return res
def clean_up(cache_dir):
    """Look for PID files whose processes have not finished/erred but ceised
    to exist => remove cache directorie.
    """

    for path in get_collection_dirs(cache_dir):
        for e in os.listdir(path):
            if '.' not in e:
                continue
            parts = e.split('.')
            if parts[0] != Application.pid_filename:
                continue
            ext = parts[1]
            if not ext:
                continue
            pid_file = os.path.join(path, e)
            try:
                pid = int(open(pid_file, 'rb').read())
            except ValueError:
                log.ERROR('PID file %r with invalid contents' % pid_file)
                continue
            except IOError, exc:
                log.ERROR('Could not read PID file %r: %s' % (pid_file, exc))
                continue
            
            try:
                os.kill(pid, 0)
            except OSError, exc:
                if exc.errno == 3: # No such process
                    log.warn('Have dangling PID file %r' % pid_file)
                    os.unlink(pid_file)
                    error_file = os.path.join(path, '%s.%s' % (Application.error_filename, ext))
                    if not os.path.exists(error_file):
                        open(error_file, 'wb').write('Process died.\n')
Ejemplo n.º 8
0
 def flatten(self, expander, variables, res):
     try:
         return self._flatten(expander, variables, res)
     except RuntimeError, err:
         # we expect a "RuntimeError: maximum recursion depth exceeded" here.
         # logging this error is rather hard...
         try:
             log.warn("error %s ignored" % (err, ))
         except:
             pass
Ejemplo n.º 9
0
 def flatten(self, expander, variables, res):
     try:
         return self._flatten(expander, variables, res)
     except RuntimeError, err:
         # we expect a "RuntimeError: maximum recursion depth exceeded" here.
         # logging this error is rather hard...
         try:
             log.warn("error %s ignored" % (err,))
         except:
             pass
Ejemplo n.º 10
0
def build_book(env, status_callback=None):
    book = parser.Book()
    progress = 0
    if status_callback is None:
        status_callback = lambda **kwargs: None

    num_articles = float(len(env.metabook.articles()))
    if num_articles > 0:
        progress_step = 100 / num_articles

    lastChapter = None
    for item in env.metabook.walk():
        if item.type == 'chapter':
            chapter = parser.Chapter(item.title.strip())
            book.appendChild(chapter)
            lastChapter = chapter
        elif item.type == 'article':
            status_callback(status='parsing',
                            progress=progress,
                            article=item.title)
            progress += progress_step

            if item._env:
                wiki = item._env.wiki
            else:
                wiki = env.wiki

            a = wiki.getParsedArticle(title=item.title, revision=item.revision)

            if a is not None:
                if item.displaytitle is not None:
                    a.caption = item.displaytitle
                url = wiki.getURL(item.title, item.revision)
                if url:
                    a.url = url
                else:
                    a.url = None
                source = wiki.getSource(item.title, item.revision)
                if source:
                    a.wikiurl = source.url
                else:
                    a.wikiurl = None

                a.authors = wiki.getAuthors(item.title, revision=item.revision)
                if lastChapter:
                    lastChapter.appendChild(a)
                else:
                    book.appendChild(a)
            else:
                log.warn('No such article: %r' % item.title)

    status_callback(status='parsing', progress=progress, article='')
    return book
Ejemplo n.º 11
0
def build_book(env, status_callback=None):
    book = parser.Book()
    progress = 0
    if status_callback is None:
        status_callback = lambda **kwargs: None
        
    num_articles = float(len(env.metabook.articles()))
    if num_articles > 0:
        progress_step = 100/num_articles
        
    lastChapter = None
    for item in env.metabook.walk():
        if item.type == 'chapter':
            chapter = parser.Chapter(item.title.strip())
            book.appendChild(chapter)
            lastChapter = chapter
        elif item.type == 'article':
            status_callback(status='parsing', progress=progress, article=item.title)
            progress += progress_step

            if item._env:
                wiki = item._env.wiki
            else:
                wiki = env.wiki
            
            a = wiki.getParsedArticle(title=item.title, revision=item.revision)
            
            if a is not None:
                if item.displaytitle is not None:
                    a.caption = item.displaytitle
                url = wiki.getURL(item.title, item.revision)                
                if url:
                    a.url = url
                else:
                    a.url = None
                source = wiki.getSource(item.title, item.revision)
                if source:
                    a.wikiurl = source.url
                else:
                    a.wikiurl = None
                    
                a.authors = wiki.getAuthors(item.title, revision=item.revision)
                if lastChapter:
                    lastChapter.appendChild(a)
                else:
                    book.appendChild(a)
            else:
                log.warn('No such article: %r' % item.title)

    status_callback(status='parsing', progress=progress, article='')
    return book
Ejemplo n.º 12
0
 def fetch_article_job(job_id):
     recorddb = RecordDB(wikidb, self.articles, self.templates,
                         self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' %
                      (mo.group('redirect'), title))
             return
Ejemplo n.º 13
0
 def fetch_article_job(job_id):
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' % (
                 mo.group('redirect'), title
             ))
             return
Ejemplo n.º 14
0
    def addArticle(
        self,
        title,
        revision=None,
        wikidb=None,
        imagedb=None,
    ):
        """Add article with given title and revision to ZIP file. This will add
        all referenced templates and images, too.
        
        @param title: article title
        @type title: unicode
        
        @param revision: article revision (optional)
        @type revision: int
        
        @param wikidb: WikiDB to use
        
        @param imagedb: ImageDB to use (optional)
        """

        if title in self.articles:
            return
        self.articles[title] = {}

        self.status(article=title)

        recorddb = RecordDB(wikidb, self.articles, self.templates,
                            self.sources)
        raw = recorddb.getRawArticle(title, revision=revision)
        if raw is None:
            log.warn('Could not get article %r' % title)
            return
        mo = self.redirect_rex.search(raw)
        if mo:
            raw = recorddb.getRawArticle(mo.group('redirect'))
            if raw is None:
                log.warn('Could not get redirected article %r (from %r)' %
                         (mo.group('redirect'), title))
                return
        self.parseArticle(
            title,
            revision=revision,
            raw=raw,
            wikidb=wikidb,
            imagedb=imagedb,
        )
        self.article_count += 1
        if self.num_articles:
            self.status(progress=self.article_count * 100 // self.num_articles)
Ejemplo n.º 15
0
 def addArticle(self, title,
     revision=None,
     wikidb=None,
     imagedb=None,
 ):
     """Add article with given title and revision to ZIP file. This will add
     all referenced templates and images, too.
     
     @param title: article title
     @type title: unicode
     
     @param revision: article revision (optional)
     @type revision: int
     
     @param wikidb: WikiDB to use
     
     @param imagedb: ImageDB to use (optional)
     """
     
     if title in self.articles:
         return
     self.articles[title] = {}
     
     self.status(article=title)
     
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
         return
     mo = self.redirect_rex.search(raw)
     if mo:
         raw = recorddb.getRawArticle(mo.group('redirect'))
         if raw is None:
             log.warn('Could not get redirected article %r (from %r)' % (
                 mo.group('redirect'), title
             ))
             return
     self.parseArticle(title,
         revision=revision,
         raw=raw,
         wikidb=wikidb,
         imagedb=imagedb,
     )
     self.article_count += 1
     if self.num_articles:
         self.status(progress=self.article_count*100//self.num_articles)
 def fetch_article_job(job_id):
     if self.fetcharticle_status:
         self.fetcharticle_status(article=title)
     recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources)
     raw = recorddb.getRawArticle(title, revision=revision)
     if raw is None:
         log.warn('Could not get article %r' % title)
     else:
         mo = self.redirect_rex.search(raw)
         if mo:
             raw = recorddb.getRawArticle(mo.group('redirect'))
             if raw is None:
                 log.warn('Could not get redirected article %r (from %r)' % (
                     mo.group('redirect'), title
                 ))
     self.article_count += 1
     if self.fetcharticle_status:
         self.fetcharticle_status(progress=self.article_count*100/self.num_articles)
Ejemplo n.º 17
0
 def fetch_image_job(name):
     path = imagedb.getDiskPath(name, size=self.imagesize)
     if path is None:
         log.warn('Could not get image %r' % name)
         return
     self.zf_lock.acquire()
     try:
         zipname = u"images/%s" % name.replace("'", '-')
         self.zf.write(path, zipname.encode("utf-8"))
     finally:
         self.zf_lock.release()
     self.images[name]['url'] = imagedb.getURL(name, size=self.imagesize)
     descriptionurl = imagedb.getDescriptionURL(name)
     if descriptionurl:
         self.images[name]['descriptionurl'] = descriptionurl
     templates = imagedb.getImageTemplates(name, wikidb=wikidb)
     if templates:
         self.images[name]['templates'] = templates
Ejemplo n.º 18
0
def checkservice(api, serviceurl, baseurl, writer, maxarticles,
                 from_email=None,
                 mail_recipients=None,
                 render_timeout=RENDER_TIMEOUT_DEFAULT  # seconds or None
                 ):
    #    arts = getRandomArticles(api, min=1, max=maxarticles)
    #    log.info('random articles: %r' % arts)
    #    metabook = getMetabook(arts)
    metabook = getRandomMetabook(api, min=5, max=maxarticles)
    if not metabook:
        reportError('render', metabook, dict(reason="getRandomMetabook Failed"), baseurl, writer,
                    from_email=from_email,
                    mail_recipients=mail_recipients)
        time.sleep(60)

    res = postRenderCommand(metabook, baseurl, serviceurl, writer)
    collection_id = res['collection_id']
    st = time.time()
    while True:
        time.sleep(1)
        res = getRenderStatus(res["collection_id"], serviceurl, writer)
        if res["state"] != "progress":
            break
        if render_timeout and (time.time() - st) > render_timeout:
            log.timeout('Killing render proc for collection ID %r' % collection_id)
            r = postRenderKillCommand(collection_id, serviceurl, writer)
            if r['killed']:
                log.info('Killed.')
            else:
                log.warn('Nothing to kill!?')
            res["state"] = "failed"
            res["reason"] = "render_timeout (%ds)" % render_timeout
            break
    if res["state"] == "finished":
        d = download(res["collection_id"], serviceurl, writer).read()
        log.info("received %s document with %d bytes" % (writer, len(d)))
        checkDoc(d, writer)
        return True
    else:
        reportError('render', metabook, res, baseurl, writer,
                    from_email=from_email,
                    mail_recipients=mail_recipients,
                    )
    return False
Ejemplo n.º 19
0
 def fetch_image_job(name):
     path = imagedb.getDiskPath(name, size=self.imagesize)
     if path is None:
         log.warn('Could not get image %r' % name)
         return
     self.zf_lock.acquire()
     try:
         zipname = u"images/%s" % name.replace("'", '-')
         self.zf.write(path, zipname.encode("utf-8"))
     finally:
         self.zf_lock.release()
     self.images[name]['url'] = imagedb.getURL(name,
                                               size=self.imagesize)
     descriptionurl = imagedb.getDescriptionURL(name)
     if descriptionurl:
         self.images[name]['descriptionurl'] = descriptionurl
     templates = imagedb.getImageTemplates(name, wikidb=wikidb)
     if templates:
         self.images[name]['templates'] = templates
Ejemplo n.º 20
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            # add newline to templates starting with a (semi)colon, or tablemarkup
            # XXX what else? see test_implicit_newline in test_expander
            if raw.startswith(":") or raw.startswith(";") or raw.startswith(
                    "{|"):
                raw = '\n' + raw

            log.info("parsing template", repr(name))
            res = Parser(raw).parse()
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Ejemplo n.º 21
0
    def makewiki(self):
        username, password, domain = None, None, None
        if self.options.login:
            if self.options.login.count(':') == 1:
                username, password = self.options.login.split(':', 1)
            else:
                username, password, domain = self.options.login.split(':', 2)
        env = wiki.makewiki(
            self.options.config,
            metabook=self.metabook,
            username=username,
            password=password,
            domain=domain,
            script_extension=self.options.script_extension,
        )
        if self.options.noimages:
            env.images = None
        if self.options.template_blacklist or self.options.template_exclusion_category:
            if hasattr(env.wiki, 'setTemplateExclusion'):
                env.wiki.setTemplateExclusion(
                    blacklist=self.options.template_blacklist,
                    category=self.options.template_exclusion_category,
                )
            else:
                log.warn(
                    'WikiDB does not support setting a template blacklist')
        if self.options.collectionpage:
            wikitext = env.wiki.getRawArticle(self.options.collectionpage)
            if wikitext is None:
                raise RuntimeError('No such collection page: %r' %
                                   (self.options.collectionpage, ))
            self.metabook = metabook.parse_collection_page(wikitext)
            env.metabook = self.metabook

        if self.options.title:
            env.metabook['title'] = self.options.title
        if self.options.subtitle:
            env.metabook['subtitle'] = self.options.subtitle

        return env
Ejemplo n.º 22
0
 def makewiki(self):
     username, password, domain = None, None, None
     if self.options.login:
         if self.options.login.count(':') == 1:
             username, password = self.options.login.split(':', 1)
         else:
             username, password, domain = self.options.login.split(':', 2)
     env = wiki.makewiki(self.options.config,
         metabook=self.metabook,
         username=username,
         password=password,
         domain=domain,
         script_extension=self.options.script_extension,
     )
     if self.options.noimages:
         env.images = None
     if self.options.template_blacklist or self.options.template_exclusion_category:
         if hasattr(env.wiki, 'setTemplateExclusion'):
             env.wiki.setTemplateExclusion(
                 blacklist=self.options.template_blacklist,
                 category=self.options.template_exclusion_category,
             )
         else:
             log.warn('WikiDB does not support setting a template blacklist')
     if self.options.collectionpage:
         wikitext = env.wiki.getRawArticle(self.options.collectionpage)
         if wikitext is None:
             raise RuntimeError('No such collection page: %r' % (
                 self.options.collectionpage,
             ))
         self.metabook = metabook.parse_collection_page(wikitext)
         env.metabook = self.metabook
     
     if self.options.title:
         env.metabook['title'] = self.options.title
     if self.options.subtitle:
         env.metabook['subtitle'] = self.options.subtitle
     
     return env
Ejemplo n.º 23
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == "":
            return ""

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode("utf8"))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            # add newline to templates starting with a (semi)colon, or tablemarkup
            # XXX what else? see test_implicit_newline in test_expander
            if raw.startswith(":") or raw.startswith(";") or raw.startswith("{|"):
                raw = "\n" + raw

            log.info("parsing template", repr(name))
            res = Parser(raw).parse()
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Ejemplo n.º 24
0
def build_book(env, status_callback=None, progress_range=None):
    book = parser.Book()
    if status_callback is not None:
        progress = progress_range[0]
        num_articles = float(len(metabook.get_item_list(env.metabook,
            filter_type='article',
        )))
        if num_articles > 0:
            progress_step = int(
                (progress_range[1] - progress_range[0])/num_articles
            )
    for item in metabook.get_item_list(env.metabook):
        if item['type'] == 'chapter':
            book.children.append(parser.Chapter(item['title'].strip()))
        elif item['type'] == 'article':
            if status_callback is not None:
                status_callback(
                    status='parsing',
                    progress=progress,
                    article=item['title'],
                )
                progress += progress_step
            a = env.wiki.getParsedArticle(
                title=item['title'],
                revision=item.get('revision'),
            )
            if a is not None:
                if "displaytitle" in item:
                    a.caption = item['displaytitle']
                url = env.wiki.getURL(item['title'], item.get('revision'))
                a.url = unicode(urllib.unquote(url.encode('utf-8')), 'utf-8')
                a.authors = env.wiki.getAuthors(item['title'], revision=item.get('revision'))
                book.children.append(a)
            else:
                log.warn('No such article: %r' % item['title'])

    if status_callback is not None:
        status_callback(status='parsing', progress=progress, article='')
    return book
Ejemplo n.º 25
0
def clean_cache(max_age, cache_dir):
    """Clean all subdirectories of cache_dir whose mtime is before now-max_age
    
    @param max_age: max age of directories in seconds
    @type max_age: int
    
    @param cache_dir: cache directory
    @type cache_dir: basestring
    """
    
    now = time.time()
    for d in os.listdir(cache_dir):
        path = os.path.join(cache_dir, d)
        if not os.path.isdir(path) or not collection_id_rex.match(d):
            log.warn('unknown item in cache dir %r: %r' % (cache_dir, d))
            continue
        if now - os.stat(path).st_mtime < max_age:
            continue
        try:
            log.info('removing directory %r' % path)
            shutil.rmtree(path)
        except Exception, exc:
            log.ERROR('could not remove directory %r: %s' % (path, exc))
Ejemplo n.º 26
0
def clean_cache(max_age, cache_dir):
    """Clean all subdirectories of cache_dir whose mtime is before now-max_age
    
    @param max_age: max age of directories in seconds
    @type max_age: int
    
    @param cache_dir: cache directory
    @type cache_dir: basestring
    """
    
    now = time.time()
    for d in os.listdir(cache_dir):
        path = os.path.join(cache_dir, d)
        if not os.path.isdir(path) or not collection_id_rex.match(d):
            log.warn('unknown item in cache dir %r: %r' % (cache_dir, d))
            continue
        if now - os.stat(path).st_mtime < max_age:
            continue
        try:
            log.info('removing directory %r' % path)
            shutil.rmtree(path)
        except Exception, exc:
            log.ERROR('could not remove directory %r: %s' % (path, exc))
Ejemplo n.º 27
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
Ejemplo n.º 28
0
    def parse_args(self):
        self.options, self.args = optparse.OptionParser.parse_args(self, args=[unicode(x, "utf-8") for x in sys.argv[1:]])
        for c in self.config_values:
            if not hasattr(c, "pages"):
                c.pages = []
            
        if self.options.logfile:
            start_logging(self.options.logfile)
        
        if self.options.metabook:
            self.metabook = json.loads(unicode(open(self.options.metabook, 'rb').read(), 'utf-8'))
        
        try:
            self.options.imagesize = int(self.options.imagesize)
            assert self.options.imagesize > 0
        except (ValueError, AssertionError):
            self.error('Argument for --imagesize must be an integer > 0.')
        
        for title in self.args:
            if self.metabook is None:
                self.metabook = metabook.collection()
            
            self.metabook.append_article(title)

        if self.options.print_template_pattern and "$1" not in self.options.print_template_pattern:
            self.error("bad --print-template-pattern argument [must contain $1, but %r does not]" % (self.options.print_template_pattern,))

        
        if self.options.print_template_prefix and self.options.print_template_pattern:
            log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
        elif self.options.print_template_prefix:
            self.options.print_template_pattern = '%s$1' % self.options.print_template_prefix

        del self.options.print_template_prefix
        
        return self.options, self.args
    def makewiki(self):
        username, password, domain = None, None, None
        if self.options.login:
            if self.options.login.count(':') == 1:
                username, password = unicode(self.options.login, 'utf-8').split(':', 1)
            else:
                username, password, domain = unicode(self.options.login, 'utf-8').split(':', 2)
        if self.options.script_extension:
            script_extension = unicode(self.options.script_extension, 'utf-8')
        else:
            script_extension = None

        env = wiki.makewiki(self.options.config,
            metabook=self.metabook,
            username=username,
            password=password,
            domain=domain,
            script_extension=script_extension,
        )
        if self.options.noimages:
            env.images = None
        if self.options.template_blacklist:
            template_blacklist = unicode(self.options.template_blacklist, 'utf-8')
        else:
            template_blacklist = None
        if self.options.template_exclusion_category:
            template_exclusion_category = unicode(self.options.template_exclusion_category, 'utf-8')
        else:
            template_exclusion_category = None
        if self.options.print_template_pattern:
            print_template_pattern = unicode(self.options.print_template_pattern, 'utf-8')
        else:
            print_template_pattern = None
        if self.options.print_template_prefix:
            if print_template_pattern is not None:
                log.warn('Both --print-template-pattern and --print-template-prefix (deprecated) specified. Using --print-template-pattern only.')
            else:
                print_template_pattern = '%s$1' % unicode(self.options.print_template_prefix, 'utf-8')
        if template_blacklist\
            or template_exclusion_category\
            or print_template_pattern:
            if hasattr(env.wiki, 'setTemplateExclusion'):
                env.wiki.setTemplateExclusion(
                    blacklist=template_blacklist,
                    category=template_exclusion_category,
                    pattern=print_template_pattern,
                )
            else:
                log.warn('WikiDB does not support setting a template blacklist')
        if self.options.collectionpage:
            wikitext = env.wiki.getRawArticle(unicode(self.options.collectionpage, 'utf-8'))
            if wikitext is None:
                raise RuntimeError('No such collection page: %r' % (
                    self.options.collectionpage,
                ))
            self.metabook = metabook.parse_collection_page(wikitext)
            env.metabook = self.metabook
        
        if self.options.title:
            env.metabook['title'] = unicode(self.options.title, 'utf-8')
        if self.options.subtitle:
            env.metabook['subtitle'] = unicode(self.options.subtitle, 'utf-8')
        if self.options.editor:
            env.metabook['editor'] = unicode(self.options.editor, 'utf-8')
        
        return env
         writer = post_data.get('writer', self.default_writer)
     except KeyError, exc:
         log.ERROR('POST argument required: %s' % exc)
         return self.http500()
     
     try:
         log.info('download %s %s' % (collection_id, writer))
     
         output_path = self.get_path(collection_id, self.output_filename, writer)
         status = self.read_status_file(collection_id, writer)
         response = wsgi.Response(content=open(output_path, 'rb'))
         os.utime(output_path, None)
         if 'content_type' in status:
             response.headers['Content-Type'] = status['content_type'].encode('utf-8', 'ignore')
         else:
             log.warn('no content type in status file')
         if 'file_extension' in status:
             response.headers['Content-Disposition'] = 'inline; filename=collection.%s' %  (
                 status['file_extension'].encode('utf-8', 'ignore'),
             )
         else:
             log.warn('no file extension in status file')
         return response
     except Exception, exc:
         log.ERROR('exception in do_download(): %r' % exc)
         return self.http500()
 
 @json_response
 def do_zip_post(self, post_data):
     try:
         metabook_data = post_data['metabook']
Ejemplo n.º 31
0
         log.ERROR('POST argument required: %s' % exc)
         return self.http500()
     
     try:
         self.check_collection_id(collection_id)
     
         log.info('download %s %s' % (collection_id, writer))
     
         output_path = self.get_path(collection_id, self.output_filename, writer)
         status = self.read_status_file(collection_id, writer)
         response = wsgi.Response(content=open(output_path, 'rb'))
         os.utime(output_path, None)
         if 'content_type' in status:
             response.headers['Content-Type'] = status['content_type'].encode('utf-8', 'ignore')
         else:
             log.warn('no content type in status file')
         if 'file_extension' in status:
             response.headers['Content-Disposition'] = 'inline;filename="collection.%s"' %  (
                 status['file_extension'].encode('utf-8', 'ignore'),
             )
         else:
             log.warn('no file extension in status file')
         return response
     except Exception, exc:
         log.ERROR('exception in do_download(): %r' % exc)
         return self.http500()
 
 @json_response
 def do_zip_post(self, post_data):
     try:
         metabook_data = post_data['metabook']