Esempio n. 1
0
class FileJobPoller(object):
    def __init__(self,
                 queue_dir,
                 processing_dir=None,
                 sleep_time=1,
                 max_num_jobs=5):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.sleep_time = sleep_time
        self.max_num_jobs = max_num_jobs
        self.num_jobs = 0
        self.log = Log('FileJobPoller')
        self.files = []

    def _reap_children(self):
        while self.num_jobs > 0:
            try:
                if self.num_jobs == self.max_num_jobs:
                    flags = 0
                else:
                    flags = os.WNOHANG
                pid, rc = os.waitpid(-1, flags)
            except OSError, exc:
                self.log.ERROR('waitpid(-1) failed: %s' % exc)
                break
            if (pid, rc) == (0, 0):
                break
            self.num_jobs -= 1
            self.log.info('child %s exited: %s. have %d jobs' %
                          (pid, rc, self.num_jobs))
Esempio n. 2
0
 def __init__(self,
              queue_dir,
              processing_dir=None,
              sleep_time=1,
              max_num_jobs=5):
     self.queue_dir = utils.ensure_dir(queue_dir)
     self.sleep_time = sleep_time
     self.max_num_jobs = max_num_jobs
     self.num_jobs = 0
     self.log = Log('FileJobPoller')
     self.files = []
class FileJobQueuer(object):
    """Write a file for each new job request"""
    
    def __init__(self, queue_dir):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.log = Log('FileJobQueuer')
    
    def __call__(self, job_type, job_id, args):
        job_file = '%s.job' % os.path.join(self.queue_dir, job_id)
        if os.path.exists(job_file):
            self.log.warn('Job file %r already exists' % job_file)
            return
        open(job_file + '.tmp', 'wb').write('\n'.join(args))
        os.rename(job_file + '.tmp', job_file)
Esempio n. 4
0
class FileJobQueuer(object):
    """Write a file for each new job request"""
    def __init__(self, queue_dir):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.log = Log('FileJobQueuer')

    def __call__(self, job_type, job_id, args):
        job_file = '%s.job' % os.path.join(self.queue_dir, job_id)
        if os.path.exists(job_file):
            self.log.warn('Job file %r already exists' % job_file)
            return

        open(job_file + '.tmp', 'wb').write(cPickle.dumps(args))
        os.rename(job_file + '.tmp', job_file)
Esempio n. 5
0
 def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5):
     self.queue_dir = utils.ensure_dir(queue_dir)
     self.sleep_time = sleep_time
     self.max_num_jobs = max_num_jobs
     self.num_jobs = 0
     self.log = Log('FileJobPoller')
     self.files = []
Esempio n. 6
0
class FileJobPoller(object):
    def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.sleep_time = sleep_time
        self.max_num_jobs = max_num_jobs
        self.num_jobs = 0
        self.log = Log('FileJobPoller')
        self.files = []
        
    def _reap_children(self):
        while self.num_jobs>0:
            try:
                if self.num_jobs==self.max_num_jobs:
                    flags = 0
                else:
                    flags = os.WNOHANG
                pid, rc = os.waitpid(-1, flags)
            except OSError, exc:
                self.log.ERROR('waitpid(-1) failed: %s' % exc)
                break
            if (pid, rc) == (0, 0):
                break
            self.num_jobs -= 1
            self.log.info('child %s exited: %s. have %d jobs' % (pid, rc, self.num_jobs))
Esempio n. 7
0
#! /usr/bin/env python

# Copyright (c) 2007-2008 PediaPress GmbH
# See README.txt for additional licensing information.

import os
from ConfigParser import ConfigParser
import StringIO

from mwlib import utils, metabook
from mwlib.log import Log

log = Log('mwlib.utils')


def wiki_mwapi(base_url=None,
               template_blacklist=None,
               template_exclusion_category=None,
               username=None,
               password=None,
               domain=None,
               script_extension=None,
               **kwargs):
    from mwlib import mwapidb
    return mwapidb.WikiDB(
        base_url,
        template_blacklist=template_blacklist,
        template_exclusion_category=template_exclusion_category,
        username=username,
        password=password,
        domain=domain,
Esempio n. 8
0
# Copyright (c) 2007-2009 PediaPress GmbH
# See README.rst for additional licensing information.

import os
import sys
try:
    import simplejson as json
except ImportError:
    import json

from mwlib.log import Log

log = Log('mwlib.status')


class Status(object):
    qproxy = None
    stdout = sys.stdout

    def __init__(
            self,
            filename=None,
            podclient=None,
            progress_range=(0, 100),
            status=None,
    ):
        self.filename = filename
        self.podclient = podclient
        if status is not None:
            self.status = status
        else:
Esempio n. 9
0
http://en.wikipedia.org/wiki/Wikipedia:Don%27t_use_line_breaks
http://meta.wikimedia.org/wiki/Help:Advanced_editing
http://meta.wikimedia.org/wiki/Help:HTML_in_wikitext
"""
import re
import time
from mwlib.parser import Math, Ref, Link, URL, NamedURL # not used but imported
from mwlib.parser import CategoryLink, SpecialLink, Caption, LangLink # not used but imported
from mwlib.parser import ArticleLink, InterwikiLink, NamespaceLink
from mwlib.parser import Item, ItemList,  Node, Table, Row, Cell, Paragraph, PreFormatted
from mwlib.parser import Section, Style, TagNode, Text, Timeline
from mwlib.parser import  ImageLink, Article, Book, Chapter
import copy
from mwlib.log import Log

log = Log("advtree")


def _idIndex(lst, el):
    """Return index of first appeareance of element el in list lst"""
    
    for i, e in enumerate(lst):
        if e is el:
            return i
    raise ValueError('element %r not found' % el)

def debug(method): # use as decorator
    def f(self, *args, **kargs):
        log("\n%s called with %r %r" % (method.__name__, args, kargs))
        log("on %r attrs:%r style:%r" % (self, self.attributes, self.style) )
        p = self
Esempio n. 10
0
import urllib

from mwlib import parser, uparser, utils
from mwlib.log import Log

log = Log('wikidbbase')

class WikiDBBase(object):
    """Base class for WikiDBs"""
    
    def getLinkURL(self, link, title, revision=None):
        """Get a full HTTP URL for the given link object, parsed from an article
        in this WikiDB.
        
        @param link: link node from parser
        @type link: L{mwlib.parser.Link}
        
        @param title: title of containing article
        @type title: unicode
        
        @param revision: revision of containing article (optional)
        @type revision: unicode
        
        @returns: full HTTP URL or None if it could not be constructed
        @rtype: str or NoneType
        """
        
        if isinstance(link, parser.ArticleLink)\
            or isinstance(link, parser.CategoryLink)\
            or isinstance(link, parser.NamespaceLink):
            
Esempio n. 11
0
#!/usr/bin/env python
# ~ -*- coding:utf-8 -*-

import re

from lxml.builder import ElementMaker
from mwlib.log import Log

from mwlib.pdf.htmlfilters.misc import hash_anchor

log = Log("mwlib.pdf.generators.contributors")
E = ElementMaker()


def generate_article_contributors(articles):
    title = _("Article Sources and Contributors")
    node = E.article(
        {"class": "contributors", "data-pp-footer-text": _("Appendix")},
        E.h1(title, hash_anchor(title)),
    )
    contributors = E.div({"class": ""})
    for article in articles:
        contributors.append(
            E.p(
                E.strong({"class": "title"}, article.title, " "),
                E.span({"class": "label"}, _("Source:"), " "),
                E.span({"class": "url"}, article.url, " "),
                E.span({"class": "label"}, _("Contributors:"), " "),
                E.span({"class": "contributors"}, filter_anon_ip_edits(article.authors),),
            )
        )
Esempio n. 12
0
import os
import re
import shutil
import tempfile
import time
import urllib
import urllib2
import urlparse

import simplejson

from mwlib import uparser, utils
from mwlib.log import Log

log = Log("mwapidb")

try:
    from mwlib.licenses import lower2normal
except ImportError:
    log.warn('no licenses found')
    lower2normal = {}

# ==============================================================================


def fetch_url(url, ignore_errors=False):
    log.info("fetching %r" % (url,))
    opener = urllib2.build_opener()
    opener.addheaders = [('User-agent', 'mwlib')]
    try:
Esempio n. 13
0
    print "you need to install odfpy: http://opendocumentfellowship.com/projects/odfpy"
    print "currently only version 0.7 is supported"
    raise

from odf.opendocument import OpenDocumentText
from odf import text, dc, meta, table, draw, math, element
from mwlib import parser
from mwlib.log import Log
from mwlib import advtree 
from mwlib import odfstyles as style
from mwlib import writerbase
from mwlib.treecleaner import TreeCleaner
from mwlib import odfconf


log = Log("odfwriter")

# using alpha software is challenging as APIs change -------------------
# check for ODF version and monkey patch stuff
e = element.Element(qname = ("a","n"))
if hasattr(e, "elements"): # odfpy-0.7
    def _f(self, c):
        log("assumming odfpy-0.7x")
        self.elements.append(e)
    element.Element.appendChild = _f
    element.Element.lastChild = property(lambda s:s.elements[-1])
    element.Element.setAttribute = element.Element.addAttribute
else:
    # assume the odfpy-08 api is stable
    # but we don't support this now, as they changed their API
    # easy_install odfpy==0.7.0  might help
Esempio n. 14
0
 def __init__(self, queue_dir):
     self.queue_dir = utils.ensure_dir(queue_dir)
     self.log = Log('FileJobQueuer')
Esempio n. 15
0
"""Simplistic threaded job scheduler"""

import Queue
import threading
import traceback

from mwlib.log import Log

log = Log('mwlib.jobsched')

# ==============================================================================


class JobScheduler(object):
    """Simple threaded job scheduler"""
    def __init__(self, num_threads):
        """Init instance with a number of worker threads and a job callable
        
        @param num_threads: number of threads to start
        @type num_threads: int
        """

        self.num_threads = num_threads
        self.job_queue = Queue.Queue()
        self.semaphore = None

    def add_job(self, job_id, do_job, **kwargs):
        """Schedule a job to be executed in a separate thread. The job_id and
        all additional kwargs are passed to the do_job callable given in the
        constructor.
        
Esempio n. 16
0
#! /usr/bin/env python

"""WSGI dispatcher base class"""

import cgi
import os
import StringIO
import time
import traceback

from mwlib.log import Log

# ==============================================================================

log = Log('mwlib.wsgi')

# ==============================================================================

class Request(object):
    max_post_data_size = 1024*1024
    
    def __init__(self, env):
        self.env = env
        self.method = self.env['REQUEST_METHOD'].upper()
        self.path = self.env.get('PATH_INFO')
        self.query = self.multi2single(cgi.parse_qs(self.env.get('QUERY_STRING', '')))
        if self.method == 'POST':
            self.post_data = self.read_post_data()
        else:
            self.post_data = {}
    
Esempio n. 17
0
import sys
import json

from mwlib.log import Log

log = Log('mwlib.statusfile')

class Status(object):
    def __init__(self,
        filename=None,
        podclient=None,
        progress_range=(0, 100),
        auto_dump=True,
    ):
        self.filename = filename
        self.podclient = podclient
        self.status = {}
        self.progress_range = progress_range
    
    def __call__(self, status=None, progress=None, article=None, auto_dump=True,
        **kwargs):
        if status is not None and status != self.status.get('status'):
            print 'STATUS: %s' % status
            self.status['status'] = status
        
        if progress is not None:
            assert 0 <= progress and progress <= 100, 'progress not in range 0..100'
            progress = int(
                self.progress_range[0]
                + progress*(self.progress_range[1] - self.progress_range[0])/100
            )
Esempio n. 18
0
#! /usr/bin/env python

# Copyright (c) 2007-2008 PediaPress GmbH
# See README.txt for additional licensing information.
"""usable/user parser"""

from mwlib import parser, scanner, expander
from mwlib.log import Log

log = Log('uparser')


def simplify(node, **kwargs):
    "concatenates textnodes in order to reduce the number of objects"
    Text = parser.Text

    last = None
    toremove = []
    for i, c in enumerate(node.children):
        if c.__class__ == Text:  # would isinstance be safe?
            if last:
                last.caption += c.caption
                toremove.append(i)
            else:
                last = c
        else:
            simplify(c)
            last = None

    for i, ii in enumerate(toremove):
        del node.children[ii - i]
Esempio n. 19
0
    from __builtin__ import all
except ImportError:
    def all(iterable):
        """all(iterable) -> bool

        Return True if bool(x) is True for all values x in the iterable.
        """
        
        for x in iterable:
            if not x:
                return False
        return True

# ==============================================================================

log = Log('mwlib.utils')

# ==============================================================================

def fsescape(s):
    """Escape string to be safely used in path names
    
    @param s: some string
    @type s: basestring
    
    @returns: escaped string
    @rtype: str
    """
    
    res = []
    for x in s:
Esempio n. 20
0
#! /usr/bin/env python

import locale
import os
import sys
import traceback

from mwlib.log import Log

import mwlib
from mwlib.pdf import html2pdf

reload(sys)
sys.setdefaultencoding("UTF8")
log = Log("mwlib.pdf.writer")


def patch_logging(output_filename):
    fn = os.path.join(os.path.dirname(output_filename), "render.log")
    mwlib.utils.start_logging(fn)


def writer(env, output, status_callback, debug=True, lang=None, x=False):
    if not lang:
        _locale = locale.getlocale(locale.LC_NUMERIC)
        if _locale:
            lang = _locale[0]
    crop_marks = False
    if not x:
        patch_logging(output)
    renderer = html2pdf.PrincePdfWriter(env,
Esempio n. 21
0
import sys
import tempfile
import time
import traceback
import urllib
import urllib2
import urlparse
import UserDict

from mwlib.log import Log

from hashlib import md5

# provide all() for python 2.4
all = all
log = Log('mwlib.utils')


def get_print_template_maker(pattern):
    assert "$1" in pattern, 'pattern %r does not contain "$1"' % pattern

    def make_print_template(title):
        if ':' in title:
            p, s = title.split(":", 1)
            s = pattern.replace("$1", s)
            return '%s:%s' % (p, s)
        else:
            return pattern.replace("$1", title)

    return make_print_template
Esempio n. 22
0
 def __init__(self, queue_dir):
     self.queue_dir = utils.ensure_dir(queue_dir)
     self.log = Log('FileJobQueuer')
Esempio n. 23
0
import cgi
import StringIO
try:
    import xml.etree.ElementTree as ET
except:
    from elementtree import ElementTree as ET

from mwlib import parser
from mwlib import advtree
from mwlib import xmltreecleaner
from mwlib.log import Log
from mwlib import writerbase

version = "0.2"

log = Log("xmlwriter")


def showNode(obj):
    attrs = obj.__dict__.keys()
    log(obj.__class__.__name__)
    stuff = [
        "%s => %r" % (k, getattr(obj, k)) for k in attrs
        if (not k in ("_parentref", "children")) and getattr(obj, k)
    ]
    if stuff:
        log(repr(stuff))


def indent(elem, level=0):
    i = u"\n" + level * u"  "
class FileJobPoller(object):
    def __init__(self, queue_dir, processing_dir, sleep_time=1, max_num_jobs=5):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.processing_dir = utils.ensure_dir(processing_dir)
        self.sleep_time = sleep_time
        self.max_num_jobs = max_num_jobs
        self.num_jobs = 0
        self.log = Log('FileJobPoller')
    
    def run_forever(self):
        self.log.info('running with a max. of %d jobs' % self.max_num_jobs)
        try:
            while True:
                filename = self.poll()
                if self.num_jobs < self.max_num_jobs and filename:
                    self.num_jobs += 1
                    self.start_job(filename)
                    self.log.info('child started: have %d jobs' % self.num_jobs)
                else:
                    time.sleep(self.sleep_time)
                while self.num_jobs > 0:
                    pid, rc = os.waitpid(-1, os.WNOHANG)
                    if (pid, rc) == (0, 0):
                        break
                    self.num_jobs -= 1
                    self.log.info('child killed: have %d jobs' % self.num_jobs)
        except KeyboardInterrupt:
            while self.num_jobs > 0:
                os.waitpid(-1, 0)
                self.num_jobs -= 1
        self.log.info('exit')
    
    def poll(self):
        files = []
        for filename in os.listdir(self.queue_dir):
            path = os.path.join(self.queue_dir, filename)
            if not os.path.isfile(path):
                continue
            heapq.heappush(files, (os.stat(path).st_mtime, filename))
        if files:
            return files[0][1]
        return None
    
    def start_job(self, filename):
        src = os.path.join(self.queue_dir, filename)
        path = os.path.join(self.processing_dir, filename)
        try:
            os.rename(src, path)
        except Exception, exc:
            self.log.warn('Could not rename %r to %r: %s' % (src, path, exc))
            traceback.print_exc()
            return
        self.log.info('starting job %r' % filename)
        pid = os.fork()
        if pid == 0:
            try:
                args = open(path, 'rb').read().split('\n')
                self.log.info('executing: %r' % args)
                try:
                    rc = subprocess.call(args)
                    assert rc == 0, 'non-zero return code'
                except Exception, exc:
                    self.log.warn('Error executing %r: %s' % (args, exc))
                    traceback.print_exc()
            finally:
                try:
                    os.unlink(path)
                except Exception, exc:
                    self.log.warn('Could not remove file %r: %s' % (path, exc))
                    traceback.print_exc()
                os._exit(0)
Esempio n. 25
0
import urllib
import urllib2
try:
    from hashlib import md5
except ImportError:
    from md5 import md5
import shutil
import sys
import time
import tempfile
import re

from mwlib import uparser, utils
from mwlib.log import Log

log = Log("netdb")

# ==============================================================================

def hashpath(name):
    """Compute hashpath for an image in the same way as MediaWiki does
    
    @param name: name of an image
    @type name: unicode
    
    @returns: hashpath to image
    @type: str
    """
    
    name = name.replace(' ', '_')
    name = name[:1].upper()+name[1:]
Esempio n. 26
0
# -*- coding: utf-8 -*-

# Copyright (c) 2007-2008 PediaPress GmbH
# See README.txt for additional licensing information.

import os
from mwlib import parser, rendermath, timeline

import urllib
import cgi

#from PIL import Image

from mwlib.log import Log

log = Log("htmlwriter")


class HTMLWriter(object):
    imglevel = 0
    namedLinkCount = 1

    def __init__(self, out, images=None, math_renderer=None):
        self.out = out
        self.level = 0
        self.images = images
        # self.images = imgdb.ImageDB(os.path.expanduser("~/images"))
        self.references = []
        if math_renderer is None:
            self.math_renderer = rendermath.Renderer()
        else:
Esempio n. 27
0
import os
import re
import urllib

from PIL import Image
from lxml import etree
from lxml.builder import ElementMaker
from mwlib.log import Log

from mwlib.pdf import utils
from mwlib.pdf.htmlfilters.sizetools import resize_node_width_to_columns
from .. import config
from ..config import column_width_pt

log = Log("mwlib.pdf.html2pdf")
E = ElementMaker()
number_re = re.compile(r"^(\d+)")


valid_image_extensions = [".png", ".jpg", ".gif", ".svg", ".jpeg"]


def fix_image_src(article):
    """
    replace img src with path on local disc
    """
    for img in article.dom.xpath("//img"):
        src = img.get("src")
        if os.path.splitext(src)[1] == ".gif":
            img_name = src.split("/")[-1]
Esempio n. 28
0
class FileJobPoller(object):
    def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5):
        self.queue_dir = utils.ensure_dir(queue_dir)
        self.sleep_time = sleep_time
        self.max_num_jobs = max_num_jobs
        self.num_jobs = 0
        self.log = Log('FileJobPoller')
        self.files = []

    def _reap_children(self):
        while self.num_jobs > 0:
            try:
                if self.num_jobs == self.max_num_jobs:
                    flags = 0
                else:
                    flags = os.WNOHANG
                pid, rc = os.waitpid(-1, flags)
            except OSError as exc:
                self.log.ERROR('waitpid(-1) failed: %s' % exc)
                break
            if (pid, rc) == (0, 0):
                break
            self.num_jobs -= 1
            self.log.info('child %s exited: %s. have %d jobs' % (pid, rc, self.num_jobs))

    def run_forever(self):
        self.log.info('running with a max. of %d jobs' % self.max_num_jobs)
        while True:
            try:
                self.poll()
                if not self.files:
                    time.sleep(self.sleep_time)

                while self.num_jobs < self.max_num_jobs and self.files:
                    self.start_job(self.files.pop())

                self._reap_children()
            except KeyboardInterrupt:
                while self.num_jobs > 0:
                    os.waitpid(-1, 0)
                    self.num_jobs -= 1
                break
            except Exception as err:
                self.log.error("caught exception: %r" % (err, ))
                traceback.print_exc()

        self.log.info('exit')

    def poll(self):
        if self.files:
            return

        files = []
        for filename in os.listdir(self.queue_dir):
            if filename.endswith(".tmp"):
                continue

            path = os.path.join(self.queue_dir, filename)
            if not os.path.isfile(path):
                continue
            try:
                mtime = os.stat(path).st_mtime
            except Exception as exc:
                self.log.ERROR('Could not stat %r: %s' % (path, exc))
                continue
            files.append((mtime, filename))

        files.sort(reverse=True)
        self.files = [x[1] for x in files]

    def start_job(self, filename):
        """Fork, and execute job from given file

        @returns: whether a new job as been started
        @rtype: bool
        """

        src = os.path.join(self.queue_dir, filename)
        try:
            args = cPickle.loads(open(src, 'rb').read())
        finally:
            os.unlink(src)

        self.log.info('starting job %r' % filename)

        pid = os.fork()
        self.num_jobs += 1

        if pid != 0:
            return True

        # child process:
        try:
            os.execvp(args[0], args)
        except BaseException:
            traceback.print_exc()
        finally:
            self.log.warn('error running %r' % (args,))
            os._exit(10)
Esempio n. 29
0
# Copyright (c) 2007-2009 PediaPress GmbH
# See README.rst for additional licensing information.
"""
class for defining DTD-Like Rules for the tree
"""
from advtree import Article

from mwlib.log import Log
log = Log("sanitychecker")

# -----------------------------------------------------------
# Constraints
# -----------------------------------------------------------


class ConstraintBase(object):
    def __init__(self, *klasses):
        self.klasses = klasses

    def test(self, nodes):
        return True, None  # passed

    def __repr__(self):
        return "%s(%s)" % (self.__class__.__name__, ", ".join(
            k.__name__ for k in self.klasses))


class Forbid(ConstraintBase):
    "forbid any of the classes"

    def test(self, nodes):
Esempio n. 30
0
#! /usr/bin/env python
"""WSGI dispatcher base class"""

import cgi
import os
import StringIO
import time
import traceback

from mwlib.log import Log

# ==============================================================================

log = Log('mwlib.wsgi')

# ==============================================================================


class Request(object):
    max_post_data_size = 1024 * 1024

    def __init__(self, env):
        self.env = env
        self.method = self.env['REQUEST_METHOD'].upper()
        self.path = self.env.get('PATH_INFO')
        self.query = self.multi2single(
            cgi.parse_qs(self.env.get('QUERY_STRING', '')))
        if self.method == 'POST':
            self.post_data = self.read_post_data()
        else:
            self.post_data = {}
Esempio n. 31
0
def check_service():
    import sys
    import time

    from mwlib.client import Client
    from mwlib.log import Log
    from mwlib import utils

    log = Log('mw-check-service')

    parser = optparse.OptionParser(usage="%prog [OPTIONS] BASEURL METABOOK")
    default_url = 'http://localhost:8899/'
    parser.add_option('-u', '--url',
        help='URL of HTTP interface to mw-serve (default: %r)' % default_url,
        default=default_url,
    )
    parser.add_option('-w', '--writer',
        help='writer to use for rendering (default: rl)',
        default='rl',
    )
    parser.add_option('--max-render-time',
        help='maximum number of seconds rendering may take (default: 120)',
        default='120',
        metavar='SECONDS',
    )
    parser.add_option('--save-output',
        help='if specified, save rendered file with given filename',
        metavar='FILENAME',
    )
    parser.add_option('-l', '--logfile',
        help='log output to LOGFILE',
    )
    parser.add_option('--report-from-mail',
        help='sender of error mails (--report-recipient also needed)',
        metavar='EMAIL',
    )
    parser.add_option('--report-recipient',
        help='recipient of error mails (--report-from-mail also needed)',
        metavar='EMAIL',
    )
    options, args = parser.parse_args()

    if len(args) != 2:
        parser.error('exactly 2 arguments required')

    base_url = args[0]
    metabook = open(args[1], 'rb').read()

    max_render_time = int(options.max_render_time)

    if options.report_recipient and options.report_from_mail:
        def report(msg):
            utils.report(
                system='mw-check-service',
                subject='mw-check-service error',
                from_email=options.report_from_mail.encode('utf-8'),
                mail_recipients=[options.report_recipient.encode('utf-8')],
                msg=msg,
            )
    else:
        report = log.ERROR

    writer = options.writer

    if options.logfile:
        utils.start_logging(options.logfile)

    client = Client(options.url)

    def check_req(command, **kwargs):
        try:
            success = client.request(command, kwargs, is_json=(command != 'download'))
        except Exception, exc:
            report('request failed: %s' % exc)
            sys.exit(1)

        if success:
            return client.response
        if client.error is not None:
            report('request failed: %s' % client.error)
            sys.exit(1)
        else:
            report('request failed: got response code %d' % client.response_code)
            sys.exit(1)
Esempio n. 32
0
def check_service():
    import sys
    import time

    from mwlib.client import Client
    from mwlib.log import Log
    from mwlib import utils

    log = Log('mw-check-service')

    parser = optparse.OptionParser(usage="%prog [OPTIONS] BASEURL METABOOK")
    default_url = 'http://localhost:8899/'
    parser.add_option(
        '-u',
        '--url',
        help='URL of HTTP interface to mw-serve (default: %r)' % default_url,
        default=default_url,
    )
    parser.add_option(
        '-w',
        '--writer',
        help='writer to use for rendering (default: rl)',
        default='rl',
    )
    parser.add_option(
        '--max-render-time',
        help='maximum number of seconds rendering may take (default: 120)',
        default='120',
        metavar='SECONDS',
    )
    parser.add_option(
        '--save-output',
        help='if specified, save rendered file with given filename',
        metavar='FILENAME',
    )
    parser.add_option(
        '-l',
        '--logfile',
        help='log output to LOGFILE',
    )
    parser.add_option(
        '--report-from-mail',
        help='sender of error mails (--report-recipient also needed)',
        metavar='EMAIL',
    )
    parser.add_option(
        '--report-recipient',
        help='recipient of error mails (--report-from-mail also needed)',
        metavar='EMAIL',
    )
    options, args = parser.parse_args()

    if len(args) != 2:
        parser.error('exactly 2 arguments required')

    base_url = args[0]
    metabook = open(args[1], 'rb').read()

    max_render_time = int(options.max_render_time)

    if options.report_recipient and options.report_from_mail:

        def report(msg):
            utils.report(
                system='mw-check-service',
                subject='mw-check-service error',
                from_email=options.report_from_mail.encode('utf-8'),
                mail_recipients=[options.report_recipient.encode('utf-8')],
                msg=msg,
            )
    else:
        report = log.ERROR

    writer = options.writer

    if options.logfile:
        utils.start_logging(options.logfile)

    client = Client(options.url)

    def check_req(command, **kwargs):
        try:
            success = client.request(command,
                                     kwargs,
                                     is_json=(command != 'download'))
        except Exception as exc:
            report('request failed: %s' % exc)
            sys.exit(1)

        if success:
            return client.response
        if client.error is not None:
            report('request failed: %s' % client.error)
            sys.exit(1)
        else:
            report('request failed: got response code %d' %
                   client.response_code)
            sys.exit(1)

    start_time = time.time()

    log.info('sending render command')
    response = check_req(
        'render',
        base_url=base_url,
        metabook=metabook,
        writer=writer,
        force_render=True,
    )
    collection_id = response['collection_id']

    while True:
        time.sleep(1)

        if time.time() - start_time > max_render_time:
            report('rendering exceeded allowed time of %d s' % max_render_time)
            sys.exit(2)

        log.info('checking status')
        response = check_req(
            'render_status',
            collection_id=collection_id,
            writer=writer,
        )
        if response['state'] == 'finished':
            break

    log.info('downloading')
    response = check_req(
        'download',
        collection_id=collection_id,
        writer=writer,
    )

    if len(response) < 100:
        report('got suspiciously small file from download: size is %d Bytes' %
               len(response))
        sys.exit(3)
    log.info('resulting file is %d Bytes' % len(response))

    if options.save_output:
        log.info('saving to %r' % options.save_output)
        open(options.save_output, 'wb').write(response)

    render_time = time.time() - start_time
    log.info('rendering ok, took %fs' % render_time)
Esempio n. 33
0
# Copyright (c) 2007-2009 PediaPress GmbH
# See README.rst for additional licensing information.

from mwlib import expander, nshandling, metabook
from mwlib.log import Log
from mwlib.refine import core, compat

log = Log('refine.uparser')


def parseString(title=None,
                raw=None,
                wikidb=None,
                revision=None,
                lang=None,
                magicwords=None,
                expandTemplates=True):
    """parse article with title from raw mediawiki text"""

    uniquifier = None
    siteinfo = None
    assert title is not None, 'no title given'
    if raw is None:
        page = wikidb.normalize_and_get_page(title, 0)
        if page:
            raw = page.rawtext
        else:
            raw = None

        assert raw is not None, "cannot get article %r" % (title, )
    input = raw
Esempio n. 34
0
#! /usr/bin/env python

# Copyright (c) 2007-2008 PediaPress GmbH
# See README.txt for additional licensing information.
"""expand magic variables/colon functions
http://meta.wikimedia.org/wiki/Help:Colon_function
http://meta.wikimedia.org/wiki/Help:Magic_words
http://meta.wikimedia.org/wiki/ParserFunctions
"""

import datetime
import urllib
from mwlib.log import Log
from mwlib import expr

log = Log("expander")


def singlearg(fun):
    def wrap(self, args):
        rl = args
        if not rl:
            a = u''
        else:
            a = rl[0]

        return fun(self, a)

    return wrap

Esempio n. 35
0
import time
import urlparse
import urllib
import urllib2
import httplib

try:
    import simplejson as json
except ImportError:
    import json

from mwlib.log import Log
from mwlib.utils import get_multipart
from mwlib import conf

log = Log("mwapidb")


class PODClient(object):
    def __init__(self, posturl, redirecturl=None):
        self.posturl = posturl.encode('utf-8')
        self.redirecturl = redirecturl

    def _post(self, data, content_type=None):
        if content_type is not None:
            headers = {'Content-Type': content_type}
        else:
            headers = {}
        return urllib2.urlopen(
            urllib2.Request(self.posturl, data, headers=headers)).read()
Esempio n. 36
0
# Copyright (c) 2007-2009 PediaPress GmbH
# See README.rst for additional licensing information.

import os
import zipfile
import shutil
import tempfile
import urllib
import sqlite3dbm
from hashlib import sha1
from mwlib import myjson as json

from mwlib import nshandling, utils
from mwlib.log import Log

log = Log('nuwiki')


class page(object):
    expanded = 0

    def __init__(self, meta, rawtext):
        self.__dict__.update(meta)
        self.rawtext = rawtext


class DumbJsonDB(object):

    def __init__(self, fn, allow_pickle=False):
        self.fn = fn
        self.allow_pickle = allow_pickle
Esempio n. 37
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2007-2008 PediaPress GmbH
# See README.txt for additional licensing information.

import sys
import os
import re

from mwlib.scanner import tokenize, TagToken, EndTagToken
from mwlib.log import Log

log = Log("parser")

tag_li = TagToken("li")
tag_div = TagToken("div")


class TokenSet(object):
    def __init__(self, lst):
        self.types = set()
        self.values = set()

        for x in lst:
            if isinstance(x, type):
                self.types.add(x)
            else:
                self.values.add(x)

    def __contains__(self, x):
        return x in self.values or type(x) in self.types
Esempio n. 38
0
import os

from lxml.builder import ElementMaker
from mwlib.log import Log
from pycountry import languages

log = Log("mwlib.pdf.generators.front_matter")
E = ElementMaker()


def generate_cover_page(env, lang):
    site_name = env.wiki.siteinfo["general"].get("sitename")
    title = env.metabook.title or _("Wiki Articles")
    subtitle = env.metabook.subtitle or _(
        "A collection from {}".format(site_name))
    editor = env.metabook.editor or "ckepper"
    article_count = get_article_count(env.metabook.items)
    img_path = os.path.join(os.path.dirname(__file__), "..", "images")

    wikipedia_logo = E.img({
        "src":
        "{}".format(os.path.join(img_path, "Wikipedia_wordmark.svg")),
        "class":
        "wikipedia_logo",
    })
    pediapress_logo = E.img({
        "src":
        "{}".format(os.path.join(img_path, "pediapress_square_bw.svg")),
        "class":
        "pediapress_logo",
    })
Esempio n. 39
0
import sys
import tempfile
import time
import traceback
import urllib
import urllib2
import urlparse
import UserDict

from mwlib.log import Log

from hashlib import md5

# provide all() for python 2.4
all = all
log = Log('mwlib.utils')


def fsescape(s):
    """Escape string to be safely used in path names

    @param s: some string
    @type s: basestring

    @returns: escaped string
    @rtype: str
    """

    res = []
    for x in s:
        c = ord(x)
Esempio n. 40
0
import sys
import tempfile
import time
import traceback
import urllib
import urllib2
import urlparse
import UserDict

from mwlib.log import Log

from hashlib import md5

# provide all() for python 2.4
all = all
log = Log("mwlib.utils")


def fsescape(s):
    """Escape string to be safely used in path names

    @param s: some string
    @type s: basestring

    @returns: escaped string
    @rtype: str
    """

    res = []
    for x in s:
        c = ord(x)