Exemple #1
0
 def __call__(self, content, arguments=None):
     content = u'\n'.join(content)
     blockcode = moin_page.blockcode(
         attrib={moin_page.class_: 'highlight'})
     pygments.highlight(content, self.lexer, TreeFormatter(), blockcode)
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
Exemple #2
0
    def __call__(self, data, contenttype=None, arguments=None):
        """
        Function called by the converter to process the
        conversion.

        TODO: Add support for different arguments
        """
        text = decode_data(data, contenttype)
        content = normalize_split_text(text)
        # Be sure we have empty string in the base url
        self.base_url = ''

        # We create an element tree from the HTML content
        # The content is a list of string, line per line
        # We can concatenate all in one string
        html_str = ''
        html_str = html_str.join(content)
        html_tree = HTML(html_str)

        # We should have a root element, which will be converted as <page>
        # for the DOM Tree. It can be <html> or <div>.
        # NB : If <html> used, it will be converted back to <div> after
        # one roundtrip
        if html_tree.tag.name != 'html' and html_tree.tag.name != 'div':
            html_str = ''.join(['<div>', html_str, '</div>'])
            html_tree = HTML(html_str)

        # Start the conversion of the first element
        # Every child of each element will be recursively convert too
        element = self.do_children(html_tree)

        # Add Global element to our DOM Tree
        body = moin_page.body(children=element)
        root = moin_page.page(children=[body])
        return root
Exemple #3
0
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     # as of py 2.7.x (and in the year 2013), the csv module seems to still
     # have troubles with unicode, thus we encode to utf-8 ...
     content = [line.encode('utf-8') for line in content]
     dialect = csv.Sniffer().sniff(content[0])
     reader = csv.reader(content, dialect)
     # ... and decode back to unicode
     rows = []
     for encoded_row in reader:
         row = []
         for encoded_cell in encoded_row:
             row.append(encoded_cell.decode('utf-8'))
         if row:
             rows.append(row)
     head = None
     cls = None
     try:
         # fragile function throws errors when csv file is incorrectly formatted
         if csv.Sniffer().has_header('\n'.join(content)):
             head = rows[0]
             rows = rows[1:]
             cls = 'moin-sortable'
     except csv.Error as e:
         head = [_('Error parsing CSV file:'), str(e)]
     table = self.build_dom_table(rows, head=head, cls=cls)
     body = moin_page.body(children=(table, ))
     return moin_page.page(children=(body, ))
 def __init__(self):
     self.current_node = moin_page.body()
     self.root = moin_page.page(children=(self.current_node, ))
     self.path = [self.root, self.current_node]
     self.header_size = 1
     self.status = ['document']
     self.footnotes = dict()
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     # as of py 2.7.x (and in the year 2013), the csv module seems to still
     # have troubles with unicode, thus we encode to utf-8 ...
     content = [line.encode('utf-8') for line in content]
     dialect = csv.Sniffer().sniff(content[0])
     reader = csv.reader(content, dialect)
     # ... and decode back to unicode
     rows = []
     for encoded_row in reader:
         row = []
         for encoded_cell in encoded_row:
             row.append(encoded_cell.decode('utf-8'))
         if row:
             rows.append(row)
     head = None
     cls = None
     try:
         # fragile function throws errors when csv file is incorrectly formatted
         if csv.Sniffer().has_header('\n'.join(content)):
             head = rows[0]
             rows = rows[1:]
             cls = 'moin-sortable'
     except csv.Error as e:
         head = [_('Error parsing CSV file:'), str(e)]
     table = self.build_dom_table(rows, head=head, cls=cls)
     body = moin_page.body(children=(table, ))
     return moin_page.page(children=(body, ))
    def parser(self, name, args, content):
        if '/' in name:
            type = Type(name)
        else:
            type = Type(type='x-moin',
                        subtype='format',
                        parameters={'name': name})
        logging.debug("parser type: %r" % (type, ))

        elem = moin_page.part(attrib={moin_page.content_type: type})

        if args:
            elem_arguments = moin_page.arguments()
            elem.append(elem_arguments)

            for key, value in args.items():
                attrib = {}
                if key:
                    attrib[moin_page.name] = key
                elem_arg = moin_page.argument(attrib=attrib,
                                              children=(value, ))
                elem_arguments.append(elem_arg)

        if content:
            elem.append(moin_page.body(children=content))

        return elem
Exemple #7
0
    def __call__(self, data, contenttype=None, arguments=None):
        """
        Function called by the converter to process the
        conversion.

        TODO: Add support for different arguments
        """
        text = decode_data(data, contenttype)
        content = normalize_split_text(text)
        # Be sure we have empty string in the base url
        self.base_url = ''

        # We create an element tree from the HTML content
        # The content is a list of string, line per line
        # We can concatenate all in one string
        html_str = ''
        html_str = html_str.join(content)
        html_tree = HTML(html_str)

        # We should have a root element, which will be converted as <page>
        # for the DOM Tree. It can be <html> or <div>.
        # NB : If <html> used, it will be converted back to <div> after
        # one roundtrip
        if html_tree.tag.name != 'html' and html_tree.tag.name != 'div':
            html_str = ''.join(['<div>', html_str, '</div>'])
            html_tree = HTML(html_str)

        # Start the conversion of the first element
        # Every child of each element will be recursively convert too
        element = self.do_children(html_tree)

        # Add Global element to our DOM Tree
        body = moin_page.body(children=element)
        root = moin_page.page(children=[body])
        return root
Exemple #8
0
    def __call__(self, rev, contenttype=None, arguments=None):
        item_name = rev.item.name
        query_keys = {'do': 'get', 'rev': rev.revid}
        attrib = {}
        if arguments:
            query = arguments.keyword.get(xinclude.href)
            if query and query.query:
                # query.query value is similar to  "w=75" given a transclusion "{{jpeg||&w=75 class="top"}}"
                query_keys.update(url_decode(query.query))
            attrib = arguments.keyword

        query = url_encode(query_keys, charset=CHARSET, encode_keys=True)

        attrib.update({
            moin_page.type_:
            unicode(self.input_type),
            xlink.href:
            Iri(scheme='wiki', authority='', path='/' + item_name,
                query=query),
        })

        obj = moin_page.object_(attrib=attrib, children=[
            item_name,
        ])
        body = moin_page.body(children=(obj, ))
        return moin_page.page(children=(body, ))
Exemple #9
0
 def __init__(self):
     self.current_node = moin_page.body()
     self.root = moin_page.page(children=(self.current_node, ))
     self.path = [self.root, self.current_node]
     self.header_size = 1
     self.status = ['document']
     self.footnotes = dict()
Exemple #10
0
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     content = u'\n'.join(content)
     blockcode = moin_page.blockcode(attrib={moin_page.class_: 'highlight'})
     pygments.highlight(content, self.lexer, TreeFormatter(), blockcode)
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
Exemple #11
0
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     content = u'\n'.join(content)
     blockcode = moin_page.blockcode(attrib={moin_page.class_: 'highlight'})
     pygments.highlight(content, self.lexer, TreeFormatter(), blockcode)
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.fqname.value
     attrib = {
         xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name, query='do=modify'),
     }
     a = moin_page.a(attrib=attrib, children=[_("%(item_name)s does not exist. Create it?", item_name=item_name)])
     body = moin_page.body(children=(a, ))
     return moin_page.page(children=(body, ))
Exemple #13
0
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.name
     attrib = {
         xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name,
                         query='do=get&rev={0}'.format(rev.revid)),
     }
     a = moin_page.a(attrib=attrib, children=[u"Download {0}.".format(item_name)])
     body = moin_page.body(children=(a, ))
     return moin_page.page(children=(body, ))
Exemple #14
0
 def __call__(self, content, arguments=None):
     """Parse the text and return DOM tree."""
     blockcode = moin_page.blockcode()
     for line in content:
         if len(blockcode):
             blockcode.append('\n')
         blockcode.append(line.expandtabs())
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
Exemple #15
0
 def __call__(self, content, arguments=None):
     """Parse the text and return DOM tree."""
     blockcode = moin_page.blockcode()
     for line in content:
         if len(blockcode):
             blockcode.append('\n')
         blockcode.append(line.expandtabs())
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
Exemple #16
0
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     blockcode = moin_page.blockcode()
     for line in content:
         if len(blockcode):
             blockcode.append('\n')
         blockcode.append(line.expandtabs())
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
Exemple #17
0
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     blockcode = moin_page.blockcode()
     for line in content:
         if len(blockcode):
             blockcode.append('\n')
         blockcode.append(line.expandtabs())
     body = moin_page.body(children=(blockcode, ))
     return moin_page.page(children=(body, ))
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.name
     attrib = {
         moin_page.type_: unicode(self.input_type),
         xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name,
                         query='do=get&rev={0}'.format(rev.revid)),
     }
     obj = moin_page.object_(attrib=attrib, children=[u'Your Browser does not support HTML5 audio/video element.', ])
     body = moin_page.body(children=(obj, ))
     return moin_page.page(children=(body, ))
Exemple #19
0
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.name
     attrib = {
         moin_page.type_: unicode(self.input_type),
         xlink.href: Iri(
             scheme="wiki", authority="", path="/" + item_name, query="do=get&rev={0}".format(rev.revid)
         ),
     }
     obj = moin_page.object_(attrib=attrib, children=[item_name])
     body = moin_page.body(children=(obj,))
     return moin_page.page(children=(body,))
Exemple #20
0
    def __call__(self, data, contenttype=None, arguments=None):
        """
        Function called by the converter to process the
        conversion.

        TODO: Add support for different arguments
        """
        text = decode_data(data, contenttype)
        # data cleanup is not needed by html_out, but is needed by moinwiki_out; CKEditor adds unwanted \n\t
        while '\t\t' in text:
            text = text.replace('\t\t', '\t')
        text = text.replace('\r\n\t', '').replace('\n\t', '')

        content = normalize_split_text(text)
        # Be sure we have empty string in the base url
        self.base_url = ''

        # We create an element tree from the HTML content
        # The content is a list of string, line per line
        # We can concatenate all in one string
        html_str = u'\n'.join(content)
        try:
            html_tree = HTML(html_str)
        except AssertionError as reason:
            # we suspect user has created or uploaded malformed HTML, try to show input as preformatted code
            msg = _('Error: malformed HTML: {reason}.').format(reason=reason)
            msg = '<div class="error"><p><strong>%s</strong></p></div>' % msg
            html_str = ''.join(['<html>', msg, '<pre>', html_str, '</pre></html>'])
            try:
                html_tree = HTML(html_str)
            except ValueError:
                msg = _('Error: malformed HTML. Try viewing source with Highlight or Modify links.')
                msg = '<div class="error"><p><strong>%s</strong></p></div>' % msg
                html_str = ''.join(['<html>', msg, '</html>'])
                html_tree = HTML(html_str)

        # We should have a root element, which will be converted as <page>
        # for the DOM Tree.
        # NB : If <html> used, it will be converted back to <div> after
        # one roundtrip
        if html_tree.tag.name != 'html':
            html_str = ''.join(['<div>', html_str, '</div>'])
            html_tree = HTML(html_str)

        # Start the conversion of the first element
        # Every child of each element will be recursively convert too
        element = self.do_children(html_tree)

        # Add Global element to our DOM Tree
        body = moin_page.body(children=element)
        root = moin_page.page(children=[body])
        return root
Exemple #21
0
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.name
     attrib = {
         xlink.href:
         Iri(scheme='wiki',
             authority='',
             path='/' + item_name,
             query='do=get&rev={0}'.format(rev.revid)),
     }
     a = moin_page.a(attrib=attrib,
                     children=[u"Download {0}.".format(item_name)])
     body = moin_page.body(children=(a, ))
     return moin_page.page(children=(body, ))
Exemple #22
0
    def handle_macro(self, elem, page):
        type = elem.get(moin_page.content_type)
        alt = elem.get(moin_page.alt)

        # TODO
        if not type or not type.startswith('x-moin/macro;name='):
            return
        name = type[18:]

        context_block = elem.tag == moin_page.part

        args_tree = None
        for item in elem:
            if item.tag.uri == moin_page.namespace:
                if item.tag.name in ('body', 'inline-body'):
                    return
                if item.tag.name == 'arguments':
                    args_tree = item

        args = None
        if args_tree:
            args = Arguments()
            for arg in args_tree:
                key = arg.get(moin_page.name)
                value = arg[0]
                if key:
                    args.keyword[key] = value
                else:
                    args.positional.append(value)

        elem_body = context_block and moin_page.body() or moin_page.inline_body()
        elem_error = moin_page.error()

        cls = plugins.importPlugin(app.cfg, 'macro', name, function='Macro')

        try:
            macro = cls() # XXX refactor all macros so they are OK without "request"
            ret = macro((), args, page, alt, context_block)

            elem_body.append(ret)
        except Exception, e:
            # we do not want that a faulty macro aborts rendering of the page
            # and makes the wiki UI unusable (by emitting a Server Error),
            # thus, in case of exceptions, we just log the problem and return
            # some standard text.
            logger.exception("Macro %s raised an exception:" % name)
            elem_error.append(_('<<%(macro_name)s: execution failed [%(error_msg)s] (see also the log)>>',
                    macro_name=name,
                    error_msg=unicode(e),
                ))
Exemple #23
0
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ('style', 'class', ):
                    attrib[moin_page(key)] = value

        body = moin_page.body(attrib=attrib)
        stack = _Stack(body, iter_content=iter_content)

        for line in iter_content:
            data = dict(((str(k), v) for k, v in self.indent_re.match(line).groupdict().iteritems() if v is not None))
            self.indent_repl(iter_content, stack, line, **data)

        return body
Exemple #24
0
 def __call__(self, rev, contenttype=None, arguments=None):
     self.item_name = rev.item.name
     try:
         contents = self.list_contents(rev.data)
         contents = [(self.process_size(size),
                      self.process_datetime(dt),
                      self.process_name(name),
         ) for size, dt, name in contents]
         table = self.build_dom_table(contents, head=[_("Size"), _("Timestamp"), _("Name")], cls='zebra')
         body = moin_page.body(children=(table, ))
         return moin_page.page(children=(body, ))
     except ArchiveException as err:
         logging.exception("An exception within archive file handling occurred:")
         # XXX we also use a table for error reporting, could be
         # something more adequate, though:
         return self.build_dom_table([[str(err)]])
Exemple #25
0
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ('style', ):
                    attrib[moin_page(key)] = value

        body = moin_page.body(attrib=attrib)

        stack = _Stack(body, iter_content=iter_content)

        # Please note that the iterator can be modified by other functions
        for line in iter_content:
            match = self.block_re.match(line)
            self._apply(match, 'block', iter_content, stack)

        return body
Exemple #26
0
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ("style",):
                    attrib[moin_page(key)] = value

        body = moin_page.body(attrib=attrib)

        stack = _Stack(body, iter_content=iter_content)

        # Please note that the iterator can be modified by other functions
        for line in iter_content:
            match = self.block_re.match(line)
            self._apply(match, "block", iter_content, stack)

        return body
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ('style', ):
                    attrib[moin_page(key)] = value
                elif key == '_old':
                    attrib[moin_page.class_] = value.replace('/', ' ')

        body = moin_page.body(attrib=attrib)

        stack = _Stack(body, iter_content=iter_content)

        for line in iter_content:
            data = dict(((str(k), v) for k, v in self.indent_re.match(line).groupdict().iteritems() if v is not None))
            self.indent_repl(iter_content, stack, line, **data)

        return body
 def __call__(self, rev, contenttype=None, arguments=None):
     item_name = rev.item.name
     attrib = {
         moin_page.type_:
         unicode(self.input_type),
         xlink.href:
         Iri(scheme='wiki',
             authority='',
             path='/' + item_name,
             query='do=get&rev={0}'.format(rev.revid)),
     }
     obj = moin_page.object_(
         attrib=attrib,
         children=[
             u'Your Browser does not support HTML5 audio/video element.',
         ])
     body = moin_page.body(children=(obj, ))
     return moin_page.page(children=(body, ))
 def __call__(self, data, contenttype=None, arguments=None):
     text = decode_data(data, contenttype)
     content = normalize_split_text(text)
     # as of py 2.7.x (and in the year 2013), the csv module seems to still
     # have troubles with unicode, thus we encode to utf-8 ...
     content = [line.encode('utf-8') for line in content]
     dialect = csv.Sniffer().sniff(content[0])
     reader = csv.reader(content, dialect)
     # ... and decode back to unicode
     rows = []
     for encoded_row in reader:
         row = []
         for encoded_cell in encoded_row:
             row.append(encoded_cell.decode('utf-8'))
         if row:
             rows.append(row)
     table = self.build_dom_table(rows)
     body = moin_page.body(children=(table, ))
     return moin_page.page(children=(body, ))
Exemple #30
0
    def handle_macro(self, elem, page):
        logging.debug("handle_macro elem: %r" % elem)
        type = elem.get(moin_page.content_type)
        alt = elem.get(moin_page.alt)

        if not type:
            return

        type = Type(type)
        if not (type.type == 'x-moin' and type.subtype == 'macro'):
            logging.debug("not a macro, skipping: %r" % (type, ))
            return

        name = type.parameters['name']
        context_block = elem.tag == moin_page.part
        args = elem[0] if len(elem) else None
        elem_body = context_block and moin_page.body() or moin_page.inline_body()
        elem_error = moin_page.error()

        try:
            cls = plugins.importPlugin(app.cfg, 'macro', name, function='Macro')
            macro = cls()
            ret = macro((), args, page, alt, context_block)
            elem_body.append(ret)

        except PluginMissingError:
            elem_error.append('<<%s>> %s' % (name, _('Error: invalid macro name.')))

        except Exception as e:
            # we do not want that a faulty macro aborts rendering of the page
            # and makes the wiki UI unusable (by emitting a Server Error),
            # thus, in case of exceptions, we just log the problem and return
            # some standard text.
            logging.exception("Macro {0} raised an exception:".format(name))
            elem_error.append(_('<<%(macro_name)s: execution failed [%(error_msg)s] (see also the log)>>',
                              macro_name=name, error_msg=unicode(e), ))

        if len(elem_body):
            elem.append(elem_body)
        if len(elem_error):
            elem.append(elem_error)
Exemple #31
0
    def __call__(self, rev, contenttype=None, arguments=None):
        item_name = rev.item.name
        query_keys = {'do': 'get', 'rev': rev.revid}
        attrib = {}
        if arguments:
            query = arguments.keyword.get(xinclude.href).query
            if query:
                query_keys.update(url_decode(query))
            attrib = arguments.keyword

        query = url_encode(query_keys, charset=CHARSET, encode_keys=True)

        attrib.update({
            moin_page.type_: unicode(self.input_type),
            xlink.href: Iri(scheme='wiki', authority='', path='/' + item_name,
                            query=query),
        })

        obj = moin_page.object_(attrib=attrib, children=[item_name, ])
        body = moin_page.body(children=(obj, ))
        return moin_page.page(children=(body, ))
Exemple #32
0
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ("style",):
                    attrib[moin_page(key)] = value
                elif key == "_old":
                    attrib[moin_page.class_] = value.replace("/", " ")

        body = moin_page.body(attrib=attrib)

        stack = _Stack(body)

        for line in iter_content:
            match = self.indent_re.match(line)
            if match:
                data = dict(((str(k), v) for k, v in match.groupdict().iteritems() if v is not None))
                self.indent_repl(iter_content, stack, line, **data)
            else:
                self.indent_repl(iter_content, stack, line, "", line)

        return body
Exemple #33
0
 def __call__(self, rev, contenttype=None, arguments=None):
     self.item_name = rev.item.name
     try:
         contents = self.list_contents(rev.data)
         contents = [(
             self.process_size(size),
             self.process_datetime(dt),
             self.process_name(name),
         ) for size, dt, name in contents]
         table = self.build_dom_table(
             contents,
             head=[_("Size"), _("Timestamp"),
                   _("Name")],
             cls='zebra')
         body = moin_page.body(children=(table, ))
         return moin_page.page(children=(body, ))
     except ArchiveException as err:
         logging.exception(
             "An exception within archive file handling occurred:")
         # XXX we also use a table for error reporting, could be
         # something more adequate, though:
         return self.build_dom_table([[str(err)]])
Exemple #34
0
    def parse_block(self, iter_content, arguments):
        attrib = {}
        if arguments:
            for key, value in arguments.keyword.iteritems():
                if key in ('style', ):
                    attrib[moin_page(key)] = value
                elif key == '_old':
                    attrib[moin_page.class_] = value.replace('/', ' ')

        body = moin_page.body(attrib=attrib)

        stack = _Stack(body, iter_content=iter_content)

        for line in iter_content:
            match = self.indent_re.match(line)
            if match:
                data = dict(((str(k), v) for k, v in match.groupdict().iteritems() if v is not None))
                self.indent_repl(iter_content, stack, line, **data)
            else:
                self.indent_repl(iter_content, stack, line, '', line)

        return body
    def parser(self, name, args, content):
        if '/' in name:
            type = Type(name)
        else:
            type = Type(type='x-moin', subtype='format', parameters={'name': name})
        logging.debug("parser type: %r" % (type, ))

        elem = moin_page.part(attrib={moin_page.content_type: type})

        if args:
            elem_arguments = moin_page.arguments()
            elem.append(elem_arguments)

            for key, value in args.items():
                attrib = {}
                if key:
                    attrib[moin_page.name] = key
                elem_arg = moin_page.argument(attrib=attrib, children=(value, ))
                elem_arguments.append(elem_arg)

        if content:
            elem.append(moin_page.body(children=content))

        return elem
class Converter(object):
    # {{{ html conversion

    # HTML tags which can be converted directly to the moin_page namespace
    symmetric_tags = set(['div', 'p', 'strong', 'code', 'quote', 'blockquote'])

    # HTML tags to define a list, except dl which is a little bit different
    list_tags = set(['ul', 'ol'])

    # HTML tags which can be convert without attributes in a different DOM tag
    simple_tags = {  # Emphasis
        'em': moin_page.emphasis,
        'i': moin_page.emphasis,
        # Strong
        'b': moin_page.strong,
        'strong': moin_page.strong,
        # Code and Blockcode
        'pre': moin_page.blockcode,
        'tt': moin_page.code,
        'samp': moin_page.code,
        # Lists
        'dl': moin_page.list_item,
        'dt': moin_page.list_item_label,
        'dd': moin_page.list_item_body,
        # Table - th and td require special processing for alignment of cell contents
        'table': moin_page.table,
        'thead': moin_page.table_header,
        'tbody': moin_page.table_body,
        'tr': moin_page.table_row,
    }

    # HTML Tag which does not have equivalence in the DOM Tree
    # But we keep the information using <span element>
    inline_tags = set(['abbr', 'acronym', 'address', 'dfn', 'kbd'])

    # HTML tags which are completely ignored by our converter.
    # We even do not process children of these elements.
    ignored_tags = set([
        'applet',
        'area',
        'button',
        'caption',
        'center',
        'fieldset',
        'form',
        'frame',
        'frameset',
        'head',
        'iframe',
        'input',
        'isindex',
        'label',
        'legend',
        'link',
        'map',
        'menu',
        'noframes',
        'noscript',
        'optgroup',
        'option',
        'param',
        'script',
        'select',
        'style',
        'textarea',
        'title',
        'var',
    ])

    # standard_attributes are html attributes which are used
    # directly in the DOM tree, without any conversion
    standard_attributes = set(['title', 'class', 'style'])

    # Regular expression to detect an html heading tag
    heading_re = re.compile('h[1-6]')

    def new(self, tag, attrib, children):
        """
        Return a new element for the DOM Tree
        """
        return ET.Element(tag, attrib=attrib, children=children)

    def new_copy(self, tag, element, attrib):
        """
        Function to copy one element to the DOM Tree.

        It first converts the child of the element,
        and the element itself.
        """
        attrib_new = self.convert_attributes(element)
        attrib.update(attrib_new)
        children = self.do_children(element)
        return self.new(tag, attrib, children)

    def new_copy_symmetric(self, element, attrib):
        """
        Create a new QName, with the same tag of the element,
        but with a different namespace.

        Then, we handle the copy normally.
        """
        tag = ET.QName(element.tag, moin_page)
        return self.new_copy(tag, element, attrib)

    def convert_attributes(self, element):
        result = {}
        for key, value in element.attrib.iteritems():
            if key in self.standard_attributes:
                result[html(key)] = value
            if key == 'id':
                result[xml('id')] = value
        return result

    def visit_heading(self, element):
        """
        Function to convert an heading tag into a proper
        element in our moin_page namespace
        """
        heading_level = element.tag[1]
        key = moin_page('outline-level')
        attrib = {}
        attrib[key] = heading_level
        return self.new_copy(moin_page.h, element, attrib)

    def visit_br(self, element):
        return moin_page.line_break()

    def visit_big(self, element):
        key = moin_page('font-size')
        attrib = {}
        attrib[key] = '120%'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_small(self, element):
        key = moin_page('font-size')
        attrib = {}
        attrib[key] = '85%'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_sub(self, element):
        key = moin_page('baseline-shift')
        attrib = {}
        attrib[key] = 'sub'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_sup(self, element):
        key = moin_page('baseline-shift')
        attrib = {}
        attrib[key] = 'super'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_u(self, element):
        key = moin_page('text-decoration')
        attrib = {}
        attrib[key] = 'underline'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_ins(self, element):
        key = moin_page('text-decoration')
        attrib = {}
        attrib[key] = 'underline'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_del(self, element):
        key = moin_page('text-decoration')
        attrib = {}
        attrib[key] = 'line-through'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_s(self, element):
        key = moin_page('text-decoration')
        attrib = {}
        attrib[key] = 'line-through'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_strike(self, element):
        key = moin_page('text-decoration')
        attrib = {}
        attrib[key] = 'line-through'
        return self.new_copy(moin_page.span, element, attrib)

    def visit_hr(self, element, default_class=u'moin-hr3'):
        return self.new_copy(moin_page.separator, element,
                             {moin_page.class_: default_class})

    def visit_img(self, element):
        """
        <img src="URI" /> --> <object xlink:href="URI />
        """
        attrib = {}
        url = Iri(element.attrib.get('src'))
        if element.attrib.get('alt'):
            attrib[html.alt] = element.attrib.get('alt')
        if url.scheme is None:
            # img tag
            target = Iri(scheme='wiki.local',
                         path=element.attrib.get("src"),
                         fragment=None)
            attrib[xinclude.href] = target
            new_node = xinclude.include(attrib=attrib)
        else:
            # object tag
            attrib[xlink.href] = url
            new_node = moin_page.object(attrib)
        return new_node

    def visit_object(self, element):
        """
        <object data="href"></object> --> <object xlink="href" />
        """
        key = xlink('href')
        attrib = {}
        if self.base_url:
            attrib[key] = ''.join([self.base_url, element.get(html.data)])
        else:
            attrib[key] = element.get(html.data)

        # Convert the href attribute into unicode
        attrib[key] = unicode(attrib[key])
        return moin_page.object(attrib)

    def visit_inline(self, element):
        """
        For some specific inline tags (defined in inline_tags)
        We just return <span element="tag.name">
        """
        key = html.class_
        attrib = {}
        attrib[key] = ''.join(['html-', element.tag.name])
        return self.new_copy(moin_page.span, element, attrib)

    def visit_li(self, element):
        """
        NB : A list item (<li>) is like the following snippet::

            <list-item>
                <list-item-label>label</list-item-label>
                <list-item-body>Body</list-item-body>
            </list-item>

        For <li> element, there is no label
        """
        list_item_body = ET.Element(moin_page.list_item_body,
                                    attrib={},
                                    children=self.do_children(element))
        return ET.Element(moin_page.list_item,
                          attrib={},
                          children=[list_item_body])

    def visit_list(self, element):
        """
        Convert a list of item (whatever the type : ordered or unordered)
        So we have html code like::

            <ul>
                <li>Item 1</li>
                <li>Item 2</li>
            </ul>

        Which will be converted to::

            <list>
                <list-item>
                    <list-item-body>Item 1</list-item-body>
                </list-item>
                <list-item>
                    <list-item-body>Item 2</list-item-body>
                </list-item>
            </list>
        """
        # We will define the appropriate attribute
        # according to the type of the list
        attrib = {}
        if element.tag == "ul" or element.tag == "dir":
            attrib[moin_page('item-label-generate')] = 'unordered'
        elif element.tag == "ol":
            attrib[moin_page('item-label-generate')] = 'ordered'

        return ET.Element(moin_page.list,
                          attrib=attrib,
                          children=self.do_children(element))

    def visit_a(self, element):
        key = xlink('href')
        attrib = {}
        href = postproc_text(self.markdown, element.attrib.get("href"))
        if allowed_uri_scheme(href):
            attrib[key] = href
        else:
            return href
        return self.new_copy(moin_page.a, element, attrib)

    def convert_align_to_class(self, attrib):
        attr = {}
        alignment = attrib.get('align')
        if alignment in (u'right', u'center', u'left'):
            attr[moin_page.class_] = alignment
        return attr

    def visit_th(self, element):
        attrib = self.convert_align_to_class(element.attrib)
        return self.new_copy(html.th, element, attrib=attrib)

    def visit_td(self, element):
        attrib = self.convert_align_to_class(element.attrib)
        return self.new_copy(html.td, element, attrib=attrib)

    def visit(self, element):
        # Our element can be converted directly, just by changing the namespace
        if element.tag in self.symmetric_tags:
            return self.new_copy_symmetric(element, attrib={})

        # Our element is enough simple to just change the tag name
        if element.tag in self.simple_tags:
            return self.new_copy(self.simple_tags[element.tag],
                                 element,
                                 attrib={})

        # Our element defines a list
        if element.tag in self.list_tags:
            return self.visit_list(element)

        # We convert our element as a span tag with element attribute
        if element.tag in self.inline_tags:
            return self.visit_inline(element)

        # We have a heading tag
        if self.heading_re.match(element.tag):
            return self.visit_heading(element)

        # Otherwise we need a specific procedure to handle it
        method_name = 'visit_' + element.tag
        method = getattr(self, method_name, None)
        if method:
            return method(element)

        # We should ignore this tag
        if element.tag in self.ignored_tags:
            logging.info("INFO : Ignored tag : {0}".format(element.tag))
            return

        logging.info("INFO : Unhandled tag : {0}".format(element.tag))
        return

    def do_children(self, element, add_lineno=False):
        new = []
        # markdown parser surrounds child nodes with unwanted u"\n" children, here we remove leading \n
        if hasattr(
                element,
                "text") and element.text is not None and element.text != u'\n':
            new.append(postproc_text(self.markdown, element.text))

        for child in element:
            r = self.visit(child)
            if r is None:
                r = ()
            elif not isinstance(r, (list, tuple)):
                if add_lineno and self.line_numbers:
                    r.attrib[html.data_lineno] = self.line_numbers.popleft()
                r = (r, )
            new.extend(r)
            # markdown parser surrounds child nodes with unwanted u"\n" children, here we drop trailing \n
            if hasattr(
                    child,
                    "tail") and child.tail is not None and child.tail != u'\n':
                new.append(postproc_text(self.markdown, child.tail))
        return new

    # }}}

    def count_lines(self, text):
        """
        Create a list of line numbers corresponding to the first line of each markdown block.

        The markdown parser does not provide text line numbers nor is there an easy way to
        add line numbers. As an alternative, we try to split the input text into the same blocks
        as the parser does, then calculate the starting line number of each block.  The list will be
        processed by the do_children method above.

        This method has unresolved problems caused by splitting the text into blocks based upon
        the presence of 2 adjacent line end characters, including:

            * blank lines within lists create separate blocks
            * omitting a blank line after a heading combines 2 elements into one block
            * using more than one blank lines between blocks

        The net result is we either have too few or too many line numbers in the generated list which
        will cause the double-click-to-edit autoscroll textarea to sometimes be off by several lines.

        TODO: revisit this when the parsing errors documented in contrib/serialized/items.moin
        (markdown item) are fixed.
        """
        line_numbers = deque()
        lineno = 1
        in_blockquote = False
        blocks = text.split(u'\n\n')
        for block in blocks:
            if not block:
                # bump count because empty blocks will be discarded
                lineno += 2
                continue
            line_count = block.count(u'\n')

            # detect and fix the problem of interspersed blank lines within blockquotes
            if block.startswith(u'    ') or block.startswith(u'\n    '):
                if in_blockquote:
                    lineno += line_count + 2
                    continue
                in_blockquote = True
            else:
                in_blockquote = False

            if block.startswith(u'\n'):
                lineno += 1
                line_numbers.append(lineno)
                lineno += line_count + 2 - 1  # -1 is already in count
            else:
                line_numbers.append(lineno)
                lineno += line_count + 2
        self.line_numbers = line_numbers

    def embedded_markup(self, text):
        """
        Per http://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites
        markdown markup allows users to specify several "safe" HTML tags within a document. These tags include:

            a b blockquote code del dd dl dt em h1 h2 h3 i img kbd li ol p pre s sup sub strong strike ul br hr

        In addition, some markdown extensions output raw HTML tags (e.g. fenced outputs "<pre><code>...").
        To prevent the <, > characters from being escaped, the embedded tags are converted to nodes by using
        the converter in html_in.py.
        """
        try:
            # work around a possible bug - there is a traceback if HTML document has no tags
            p_text = html_in_converter(u'<p>%s</p>' % text)
        except AssertionError:
            # html_in converter (EmeraldTree) throws exceptions on markup style links: "Some text <http://moinmo.in> more text"
            p_text = text

        if not isinstance(
                p_text, unicode
        ) and p_text.tag == moin_page.page and p_text[
                0].tag == moin_page.body and p_text[0][0].tag == moin_page.p:
            # will fix possible problem of P node having block children later
            return p_text[0][0]
        return p_text

    def convert_embedded_markup(self, node):
        """
        Recurse through tree looking for embedded markup.

        :param node: a tree node
        """
        for idx, child in enumerate(node):
            if isinstance(child, unicode):
                if u'<' in child:
                    node[idx] = self.embedded_markup(
                        child
                    )  # child is immutable string, so must do node[idx]
            else:
                # do not convert markup within a <pre> tag
                if not child.tag == moin_page.blockcode:
                    self.convert_embedded_markup(child)

    def convert_invalid_p_nodes(self, node):
        """
        Processing embedded HTML tags within markup or output from extensions with embedded markup can
        result in invalid HTML output caused by <p> tags enclosing a block element.

        The solution is to search for these occurances and change the <p> tag to a <div>.

        :param node: a tree node
        """
        for child in node:
            if not isinstance(child, unicode):
                if child.tag == moin_page.p and len(child):
                    for grandchild in child:
                        if not isinstance(
                                grandchild,
                                unicode) and grandchild.tag in BLOCK_ELEMENTS:
                            child.tag = moin_page.div
                self.convert_invalid_p_nodes(child)

    def __init__(self):
        self.markdown = Markdown(extensions=[
            'extra',
            'toc',
        ])

    @classmethod
    def _factory(cls, input, output, **kw):
        return cls()

    def __call__(self, data, contenttype=None, arguments=None):
        text = decode_data(data, contenttype)

        # {{{ stolen from Markdown.convert

        # Fixup the source text
        try:
            text = unicode(text)
        except UnicodeDecodeError, e:
            # Customise error message while maintaining original traceback
            e.reason += '. -- Note: Markdown only accepts unicode input!'
            raise

        text = text.replace(md_util.STX, "").replace(md_util.ETX, "")
        text = text.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        text = text.expandtabs(self.markdown.tab_length)
        text = re.sub(r'(?<=\n) +\n', '\n', text)
        self.count_lines(text)

        # Split into lines and run the line preprocessors.
        lines = text.split("\n")
        for prep in self.markdown.preprocessors.values():
            lines = prep.run(lines)

        # Parse the high-level elements.
        md_root = self.markdown.parser.parseDocument(lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.markdown.treeprocessors.values():
            new_md_root = treeprocessor.run(md_root)
            if new_md_root:
                md_root = new_md_root

        # }}}

        # md_root is a list of plain old Python ElementTree objects.

        add_lineno = bool(flaskg and flaskg.add_lineno_attr)
        converted = self.do_children(md_root, add_lineno=add_lineno)
        body = moin_page.body(children=converted)
        root = moin_page.page(children=[body])
        self.convert_embedded_markup(root)
        self.convert_invalid_p_nodes(root)

        return root
Exemple #37
0
    def __call__(self, data, contenttype=None, arguments=None):
        """
        Convert markdown to moin DOM.

        data is a pointer to an open file (ProtectedRevision object)
        contenttype is likely == u'text/x-markdown;charset=utf-8'
        arguments is not used

        Markdown processing takes place in five steps:

        1. A bunch of "preprocessors" munge the input text.
        2. BlockParser() parses the high-level structural elements of the
           pre-processed text into an ElementTree.
        3. A bunch of "treeprocessors" are run against the ElementTree. One
           such treeprocessor runs InlinePatterns against the ElementTree,
           detecting inline markup.
        4. Some post-processors are run against the ElementTree nodes containing text
            and the ElementTree is converted to an EmeraldTree.
        5. The root of the EmeraldTree is returned.

        """
        # read the data from wiki storage and convert to unicode
        text = decode_data(data, contenttype)

        # Normalize whitespace for consistent parsing. - copied from NormalizeWhitespace in markdown/preprocessors.py
        text = text.replace(md_util.STX, "").replace(md_util.ETX, "")
        text = text.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        text = text.expandtabs(self.markdown.tab_length)
        text = re.sub(r'(?<=\n) +\n', '\n', text)

        # save line counts for start of each block, used later for edit autoscroll
        self.count_lines(text)

        # {{{ stolen from Markdown.convert

        # Split into lines and run the line preprocessors.
        lines = text.split("\n")
        for prep in self.markdown.preprocessors.values():
            lines = prep.run(lines)

        # Parse the high-level elements, md_root is an ElementTree object
        md_root = self.markdown.parser.parseDocument(lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.markdown.treeprocessors.values():
            new_md_root = treeprocessor.run(md_root)
            if new_md_root:
                md_root = new_md_root

        # }}} end stolen from Markdown.convert

        add_lineno = bool(flaskg and flaskg.add_lineno_attr)

        # run markdown post processors and convert from ElementTree to an EmeraldTree object
        converted = self.do_children(md_root, add_lineno=add_lineno)

        # convert html embedded in text strings to EmeraldTree nodes
        self.convert_embedded_markup(converted)
        # convert P-tags containing block elements to DIV-tags
        self.convert_invalid_p_nodes(converted)

        body = moin_page.body(children=converted)
        root = moin_page.page(children=[body])

        return root
Exemple #38
0
    def handle_macro(self, elem, page):
        logging.debug("handle_macro elem: %r" % elem)
        type = elem.get(moin_page.content_type)
        alt = elem.get(moin_page.alt)

        if not type:
            return

        type = Type(type)
        if not (type.type == 'x-moin' and type.subtype == 'macro'):
            logging.debug("not a macro, skipping: %r" % type)
            return

        name = type.parameters['name']

        context_block = elem.tag == moin_page.part

        args_tree = None
        for item in elem:
            if item.tag.uri == moin_page.namespace:
                if item.tag.name in ('body', 'inline-body'):
                    return
                if item.tag.name == 'arguments':
                    args_tree = item

        args = None
        if args_tree:
            args = Arguments()
            for arg in args_tree:
                key = arg.get(moin_page.name)
                value = arg[0]
                if key:
                    args.keyword[key] = value
                else:
                    args.positional.append(value)

        elem_body = context_block and moin_page.body() or moin_page.inline_body()
        elem_error = moin_page.error()

        try:
            cls = plugins.importPlugin(app.cfg, 'macro', name, function='Macro')
            macro = cls()
            ret = macro((), args, page, alt, context_block)
            elem_body.append(ret)

        except PluginMissingError:
            elem_error.append('<<%s>> %s' % (name, _('Error: invalid macro name.')))

        except Exception as e:
            # we do not want that a faulty macro aborts rendering of the page
            # and makes the wiki UI unusable (by emitting a Server Error),
            # thus, in case of exceptions, we just log the problem and return
            # some standard text.
            logger.exception("Macro {0} raised an exception:".format(name))
            elem_error.append(_('<<%(macro_name)s: execution failed [%(error_msg)s] (see also the log)>>',
                    macro_name=name,
                    error_msg=unicode(e),
                ))

        if len(elem_body):
            elem.append(elem_body)
        if len(elem_error):
            elem.append(elem_error)