Beispiel #1
0
    def visit_data_element(self, element, depth, object_data, text_object, caption):
        """
        We will try to return an object element based on the
        object_data. If it is not possible, we return a paragraph
        with the content of text_object.
        """
        attrib = {}
        preferred_format, data_tag, mimetype = self.media_tags[element.tag.name]
        if not object_data:
            if not text_object:
                return
            else:
                children = self.do_children(element, depth + 1)
                return self.new(moin_page.p, attrib={}, children=children)
        # We try to determine the best object to show
        for obj in object_data:
            format = obj.get('format')  # format is optional: <imagedata format="jpeg" fileref="jpeg.jpg"/>
            if format:
                format = format.lower()
                if format in preferred_format:
                    object_to_show = obj
                    break
                else:
                    # unsupported format
                    object_to_show = None
            else:
                # XXX: Maybe we could add some verification over the extension of the file
                object_to_show = obj

        if object_to_show is None:
            # we could not find any suitable object, return the text_object replacement.
            children = self.do_children(text_object, depth + 1)
            return self.new(moin_page.p, attrib={}, children=children)

        href = object_to_show.get('fileref')
        if not href:
            # We could probably try to use entityref,
            # but at this time we won't support it.
            return
        attrib[html.alt] = href
        attrib[xlink.href] = '+get/' + href
        format = object_to_show.get('format')
        if format:
            format = format.lower()
            attrib[moin_page('type')] = ''.join([mimetype, format])
        else:
            attrib[moin_page('type')] = mimetype
        align = object_to_show.get('align')
        if align and align in set(['left', 'center', 'right', 'top', 'middle', 'bottom']):
            attrib[html.class_] = align

        # return object tag, html_out.py will convert to img, audio, or video based on type attr
        ret = ET.Element(moin_page.object, attrib=attrib)
        ret = mark_item_as_transclusion(ret, href)
        if caption:
            caption = self.new(moin_page.span, attrib={moin_page.class_: 'db-caption'}, children=[caption])
            return self.new(moin_page.span, attrib={}, children=[ret, caption])
        else:
            return ret
Beispiel #2
0
    def recurse(self, elem, page_href):
        # on first call, elem.tag.name=='page'.
        # Descendants (body, div, p, include, page, etc.) are processed by recursing through DOM

        # stack is used to detect transclusion loops
        page_href_new = elem.get(self.tag_page_href)
        if page_href_new:
            page_href_new = Iri(page_href_new)
            if page_href_new != page_href:
                page_href = page_href_new
                self.stack.append(page_href)
            else:
                self.stack.append(None)
        else:
            self.stack.append(None)

        try:
            if elem.tag == self.tag_xi_include:
                # we have already recursed several levels and found a transclusion: "{{SomePage}}" or similar
                # process the transclusion and add it to the DOM.  Subsequent recursions will traverse through
                # the transclusion's elements.
                href = elem.get(self.tag_xi_href)
                xpointer = elem.get(self.tag_xi_xpointer)

                xp_include_pages = None
                xp_include_sort = None
                xp_include_items = None
                xp_include_skipitems = None
                xp_include_heading = None
                xp_include_level = None

                if xpointer:
                    xp = XPointer(xpointer)
                    xp_include = None
                    xp_namespaces = {}
                    for entry in xp:
                        uri = None
                        name = entry.name.split(':', 1)
                        if len(name) > 1:
                            prefix, name = name
                            uri = xp_namespaces.get(prefix, False)
                        else:
                            name = name[0]

                        if uri is None and name == 'xmlns':
                            d_prefix, d_uri = entry.data.split('=', 1)
                            xp_namespaces[d_prefix] = d_uri
                        elif uri == moin_page.namespace and name == 'include':
                            xp_include = XPointer(entry.data)

                    if xp_include:
                        for entry in xp_include:
                            name, data = entry.name, entry.data_unescape
                            if name == 'pages':
                                xp_include_pages = data
                            elif name == 'sort':
                                xp_include_sort = data
                            elif name == 'items':
                                xp_include_items = int(data)
                            elif name == 'skipitems':
                                xp_include_skipitems = int(data)
                            elif name == 'heading':
                                xp_include_heading = data
                            elif name == 'level':
                                xp_include_level = data

                if href:
                    # We have a single page to transclude
                    href = Iri(href)
                    link = Iri(scheme='wiki', authority='')
                    if href.scheme == 'wiki':
                        if href.authority:
                            raise ValueError(
                                "can't handle xinclude for non-local authority"
                            )
                        else:
                            path = href.path[1:]
                    elif href.scheme == 'wiki.local':
                        page = page_href
                        path = href.path
                        if path[0] == '':
                            # /subitem
                            tmp = page.path[1:]
                            tmp.extend(path[1:])
                            path = tmp
                        elif path[0] == '..':
                            # ../sisteritem
                            path = page.path[1:] + path[1:]
                    else:
                        raise ValueError(
                            "can't handle xinclude for schemes other than wiki or wiki.local"
                        )

                    link.path = path

                    if flaskg.user.may.read(unicode(path)):
                        page = Item.create(unicode(path))
                        pages = ((page, link), )
                    else:
                        # ACLs prevent user from viewing a transclusion - show message
                        message = moin_page.p(children=(_(
                            'Access Denied, transcluded content suppressed.')))
                        attrib = {html.class_: 'warning'}
                        div = ET.Element(moin_page.div,
                                         attrib,
                                         children=(message, ))
                        container = ET.Element(moin_page.body,
                                               children=(div, ))
                        return [
                            container, 0
                        ]  # replace transclusion with container's child

                elif xp_include_pages:
                    # XXX we currently interpret xp_include_pages as wildcard, but it should be regex
                    # for compatibility with moin 1.9. whoosh has upcoming regex support, but it is not
                    # released yet.
                    if xp_include_pages.startswith('^'):
                        # get rid of the leading ^ the Include macro needed to get into "regex mode"
                        xp_include_pages = xp_include_pages[1:]
                    query = And([
                        Term(WIKINAME, app.cfg.interwikiname),
                        Wildcard(NAME_EXACT, xp_include_pages)
                    ])
                    reverse = xp_include_sort == 'descending'
                    results = flaskg.storage.search(query,
                                                    sortedby=NAME_EXACT,
                                                    reverse=reverse,
                                                    limit=None)
                    pagelist = [result[NAME] for result in results]
                    if xp_include_skipitems is not None:
                        pagelist = pagelist[xp_include_skipitems:]
                    if xp_include_items is not None:
                        pagelist = pagelist[xp_include_items + 1:]

                    pages = ((Item.create(p),
                              Iri(scheme='wiki', authority='', path='/' + p))
                             for p in pagelist)

                included_elements = []
                for page, p_href in pages:
                    if p_href.path[0] != '/':
                        p_href.path = IriPath('/' + '/'.join(p_href.path))
                    if p_href in self.stack:
                        # we have a transclusion loop, create an error message showing list of pages forming loop
                        loop = self.stack[self.stack.index(p_href):]
                        loop = [
                            u'{0}'.format(ref.path[1:])
                            for ref in loop if ref is not None
                        ] + [page.name]
                        msg = u'Error: Transclusion loop via: ' + u', '.join(
                            loop)
                        attrib = {html.class_: 'moin-error'}
                        strong = ET.Element(moin_page.strong, attrib, (msg, ))
                        included_elements.append(strong)
                        continue
                    # TODO: Is this correct?
                    if not flaskg.user.may.read(page.name):
                        continue

                    if xp_include_heading is not None:
                        attrib = {self.tag_href: p_href}
                        children = (xp_include_heading or page.name, )
                        elem_a = ET.Element(self.tag_a,
                                            attrib,
                                            children=children)
                        attrib = {
                            self.tag_outline_level: xp_include_level or '1'
                        }
                        elem_h = ET.Element(self.tag_h,
                                            attrib,
                                            children=(elem_a, ))
                        included_elements.append(elem_h)

                    page_doc = page.content.internal_representation(
                        attributes=Arguments(keyword=elem.attrib))

                    self.recurse(page_doc, page_href)

                    # The href needs to be an absolute URI, without the prefix "wiki://"
                    page_doc = mark_item_as_transclusion(page_doc, p_href.path)
                    included_elements.append(page_doc)

                if len(included_elements) > 1:
                    # use a div as container
                    result = ET.Element(self.tag_div)
                    result.extend(included_elements)
                elif included_elements:
                    result = included_elements[0]
                else:
                    result = None
                #  end of processing for transclusion; the "result" will get inserted into the DOM below
                return result

            # Traverse the DOM by calling self.recurse with each child of the current elem.
            # Starting elem.tag.name=='page'.
            container = []
            i = 0
            while i < len(elem):
                child = elem[i]
                if isinstance(child, ET.Node):

                    ret = self.recurse(child, page_href)

                    if ret:
                        # Either child or a descendant of child is a transclusion.
                        # See top of this script for notes on why these DOM adjustments are required.
                        if isinstance(ret, ET.Node
                                      ) and elem.tag.name in NO_BLOCK_CHILDREN:
                            body = ret[0]
                            if len(body) == 0:
                                # the transcluded item is empty, insert an empty span into DOM
                                attrib = Attributes(ret).convert()
                                elem[i] = ET.Element(moin_page.span,
                                                     attrib=attrib)
                            elif (isinstance(body[0], ET.Node)
                                  and (len(body) > 1 or body[0].tag.name
                                       not in ('p', 'object', 'a'))):
                                # Complex case: "some text {{BlockItem}} more text" or "\n{{BlockItem}}\n" where
                                # the BlockItem body contains multiple p's, a table, preformatted text, etc.
                                # These block elements cannot be made a child of the current elem, so we create
                                # a container to replace elem.
                                # Create nodes to hold any siblings before and after current child (elem[i])
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i]
                                after[:] = elem[i + 1:]
                                if len(before):
                                    # there are siblings before transclude, save them in container
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # get attributes from page node;
                                # we expect {class: "moin-transclusion"; data-href: "http://some.org/somepage"}
                                attrib = Attributes(ret).convert()
                                # current elem will likely be replaced by container so we need to copy data-lineno attr
                                if html.data_lineno in elem.attrib:
                                    attrib[html.data_lineno] = elem.attrib[
                                        html.data_lineno]
                                # make new div node to hold transclusion, copy children, and save in container
                                div = ET.Element(moin_page.div,
                                                 attrib=attrib,
                                                 children=body[:])
                                container.append(
                                    div)  # new_trans_ptr is index to this
                                if len(after):
                                    container.append(after)
                                if elem.tag.name == 'a':
                                    # invalid input [[MyPage|{{BlockItem}}]],
                                    # best option is to retain A-tag and fail html validation
                                    # TODO: error may not be obvious to user - add error message
                                    elem[i] = div
                                else:
                                    # move up 1 level in recursion where elem becomes the child and
                                    # is usually replaced by container
                                    return [container, new_trans_ptr]
                            else:
                                # default action for inline transclusions or odd things like circular transclusion error messages
                                classes = child.attrib.get(html.class_,
                                                           '').split()
                                classes += ret.attrib.get(html.class_,
                                                          '').split()
                                ret.attrib[html.class_] = ' '.join(classes)
                                elem[i] = ret
                        elif isinstance(ret, types.ListType):
                            # a container has been returned.
                            # Note: there are multiple places where a container may be constructed
                            ret_container, trans_ptr = ret
                            # trans_ptr points to the transclusion within ret_container.
                            # Here the transclusion will always contain a block level element
                            if elem.tag.name in NO_BLOCK_CHILDREN:
                                # Complex case, transclusion effects grand-parent, great-grand-parent, e.g.:
                                # "/* comment {{BlockItem}} */" or  "text ''italic {{BlockItem}} italic'' text"
                                # elem is an inline element, build a bigger container to replace elem's parent,
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i] + ret_container[
                                    0:trans_ptr]
                                after[:] = ret_container[trans_ptr +
                                                         1:] + elem[i + 1:]
                                if len(before):
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # child may have classes like "comment" that must be added to transcluded element
                                classes = child.attrib.get(
                                    moin_page.class_, '').split()
                                # must use moin_page.class_ above, but use html.class below per html_out.py code
                                classes += ret_container[trans_ptr].attrib.get(
                                    html.class_, '').split()
                                ret_container[trans_ptr].attrib[
                                    html.class_] = ' '.join(classes)
                                container.append(ret_container[trans_ptr]
                                                 )  # the transclusion
                                if len(after):
                                    container.append(after)
                                return [container, new_trans_ptr]
                            else:
                                # elem is a block element
                                for grandchild in child:
                                    if isinstance(
                                            grandchild, ET.Node
                                    ) and grandchild.tag.name == u'include':
                                        # the include may have classes that must be added to transcluded element
                                        classes = grandchild.attrib.get(
                                            html.class_, '').split()
                                        classes += ret_container[
                                            trans_ptr].attrib.get(
                                                html.class_, '').split()
                                        ret_container[trans_ptr].attrib[
                                            html.class_] = ' '.join(classes)
                                # replace child element with the container generated in lower recursion
                                elem[i:i +
                                     1] = ret_container  # elem[i] is the child
                        else:
                            # default action for any ret not fitting special cases above,
                            # e.g. tranclusion is within a table cell
                            elem[i] = ret
                # we are finished with this child, advance to next sibling
                i += 1

        finally:
            self.stack.pop()
Beispiel #3
0
    def recurse(self, elem, page_href):
        # on first call, elem.tag.name=='page'.
        # Descendants (body, div, p, include, page, etc.) are processed by recursing through DOM

        # stack is used to detect transclusion loops
        page_href_new = elem.get(self.tag_page_href)
        if page_href_new:
            page_href_new = Iri(page_href_new)
            if page_href_new != page_href:
                page_href = page_href_new
                self.stack.append(page_href)
            else:
                self.stack.append(None)
        else:
            self.stack.append(None)

        try:
            if elem.tag == self.tag_xi_include:
                # we have already recursed several levels and found a transclusion: "{{SomePage}}" or similar
                # process the transclusion and add it to the DOM.  Subsequent recursions will traverse through
                # the transclusion's elements.
                href = elem.get(self.tag_xi_href)
                xpointer = elem.get(self.tag_xi_xpointer)

                xp_include_pages = None
                xp_include_sort = None
                xp_include_items = None
                xp_include_skipitems = None
                xp_include_heading = None
                xp_include_level = None

                if xpointer:
                    xp = XPointer(xpointer)
                    xp_include = None
                    xp_namespaces = {}
                    for entry in xp:
                        uri = None
                        name = entry.name.split(':', 1)
                        if len(name) > 1:
                            prefix, name = name
                            uri = xp_namespaces.get(prefix, False)
                        else:
                            name = name[0]

                        if uri is None and name == 'xmlns':
                            d_prefix, d_uri = entry.data.split('=', 1)
                            xp_namespaces[d_prefix] = d_uri
                        elif uri == moin_page.namespace and name == 'include':
                            xp_include = XPointer(entry.data)

                    if xp_include:
                        for entry in xp_include:
                            name, data = entry.name, entry.data_unescape
                            if name == 'pages':
                                xp_include_pages = data
                            elif name == 'sort':
                                xp_include_sort = data
                            elif name == 'items':
                                xp_include_items = int(data)
                            elif name == 'skipitems':
                                xp_include_skipitems = int(data)
                            elif name == 'heading':
                                xp_include_heading = data
                            elif name == 'level':
                                xp_include_level = data

                if href:
                    # We have a single page to transclude
                    href = Iri(href)
                    link = Iri(scheme='wiki', authority='')
                    if href.scheme == 'wiki':
                        if href.authority:
                            raise ValueError("can't handle xinclude for non-local authority")
                        else:
                            path = href.path[1:]
                    elif href.scheme == 'wiki.local':
                        page = page_href
                        path = href.path
                        if path[0] == '':
                            # /subitem
                            tmp = page.path[1:]
                            tmp.extend(path[1:])
                            path = tmp
                        elif path[0] == '..':
                            # ../sisteritem
                            path = page.path[1:] + path[1:]
                    else:
                        raise ValueError("can't handle xinclude for schemes other than wiki or wiki.local")

                    link.path = path

                    if flaskg.user.may.read(unicode(path)):
                        page = Item.create(unicode(path))
                        pages = ((page, link), )
                    else:
                        # ACLs prevent user from viewing a transclusion - show message
                        message = moin_page.p(children=(_('Access Denied, transcluded content suppressed.')))
                        attrib = {html.class_: 'warning'}
                        div = ET.Element(moin_page.div, attrib, children=(message, ))
                        container = ET.Element(moin_page.body, children=(div, ))
                        return [container, 0]  # replace transclusion with container's child

                elif xp_include_pages:
                    # XXX we currently interpret xp_include_pages as wildcard, but it should be regex
                    # for compatibility with moin 1.9. whoosh has upcoming regex support, but it is not
                    # released yet.
                    if xp_include_pages.startswith('^'):
                        # get rid of the leading ^ the Include macro needed to get into "regex mode"
                        xp_include_pages = xp_include_pages[1:]
                    query = And([Term(WIKINAME, app.cfg.interwikiname), Wildcard(NAME_EXACT, xp_include_pages)])
                    reverse = xp_include_sort == 'descending'
                    results = flaskg.storage.search(query, sortedby=NAME_EXACT, reverse=reverse, limit=None)
                    pagelist = [result[NAME] for result in results]
                    if xp_include_skipitems is not None:
                        pagelist = pagelist[xp_include_skipitems:]
                    if xp_include_items is not None:
                        pagelist = pagelist[xp_include_items + 1:]

                    pages = ((Item.create(p), Iri(scheme='wiki', authority='', path='/' + p)) for p in pagelist)

                included_elements = []
                for page, p_href in pages:
                    if p_href.path[0] != '/':
                        p_href.path = IriPath('/' + '/'.join(p_href.path))
                    if p_href in self.stack:
                        # we have a transclusion loop, create an error message showing list of pages forming loop
                        loop = self.stack[self.stack.index(p_href):]
                        loop = [u'{0}'.format(ref.path[1:]) for ref in loop if ref is not None] + [page.name]
                        msg = u'Error: Transclusion loop via: ' + u', '.join(loop)
                        attrib = {html.class_: 'moin-error'}
                        strong = ET.Element(moin_page.strong, attrib, (msg, ))
                        included_elements.append(strong)
                        continue
                    # TODO: Is this correct?
                    if not flaskg.user.may.read(page.name):
                        continue

                    if xp_include_heading is not None:
                        attrib = {self.tag_href: p_href}
                        children = (xp_include_heading or page.name, )
                        elem_a = ET.Element(self.tag_a, attrib, children=children)
                        attrib = {self.tag_outline_level: xp_include_level or '1'}
                        elem_h = ET.Element(self.tag_h, attrib, children=(elem_a, ))
                        included_elements.append(elem_h)

                    page_doc = page.content.internal_representation(attributes=Arguments(keyword=elem.attrib))

                    self.recurse(page_doc, page_href)

                    # The href needs to be an absolute URI, without the prefix "wiki://"
                    page_doc = mark_item_as_transclusion(page_doc, p_href.path)
                    included_elements.append(page_doc)

                if len(included_elements) > 1:
                    # use a div as container
                    result = ET.Element(self.tag_div)
                    result.extend(included_elements)
                elif included_elements:
                    result = included_elements[0]
                else:
                    result = None
                #  end of processing for transclusion; the "result" will get inserted into the DOM below
                return result

            # Traverse the DOM by calling self.recurse with each child of the current elem.
            # Starting elem.tag.name=='page'.
            container = []
            i = 0
            while i < len(elem):
                child = elem[i]
                if isinstance(child, ET.Node):

                    ret = self.recurse(child, page_href)

                    if ret:
                        # Either child or a descendant of child is a transclusion.
                        # See top of this script for notes on why these DOM adjustments are required.
                        if isinstance(ret, ET.Node) and elem.tag.name in NO_BLOCK_CHILDREN:
                            body = ret[0]
                            if len(body) == 0:
                                # the transcluded item is empty, insert an empty span into DOM
                                attrib = Attributes(ret).convert()
                                elem[i] = ET.Element(moin_page.span, attrib=attrib)
                            elif (isinstance(body[0], ET.Node) and
                                  (len(body) > 1 or body[0].tag.name not in ('p', 'object', 'a'))):
                                # Complex case: "some text {{BlockItem}} more text" or "\n{{BlockItem}}\n" where
                                # the BlockItem body contains multiple p's, a table, preformatted text, etc.
                                # These block elements cannot be made a child of the current elem, so we create
                                # a container to replace elem.
                                # Create nodes to hold any siblings before and after current child (elem[i])
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i]
                                after[:] = elem[i + 1:]
                                if len(before):
                                    # there are siblings before transclude, save them in container
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # get attributes from page node;
                                # we expect {class: "moin-transclusion"; data-href: "http://some.org/somepage"}
                                attrib = Attributes(ret).convert()
                                # current elem will likely be replaced by container so we need to copy data-lineno attr
                                if html.data_lineno in elem.attrib:
                                    attrib[html.data_lineno] = elem.attrib[html.data_lineno]
                                # make new div node to hold transclusion, copy children, and save in container
                                div = ET.Element(moin_page.div, attrib=attrib, children=body[:])
                                container.append(div)  # new_trans_ptr is index to this
                                if len(after):
                                    container.append(after)
                                if elem.tag.name == 'a':
                                    # invalid input [[MyPage|{{BlockItem}}]],
                                    # best option is to retain A-tag and fail html validation
                                    # TODO: error may not be obvious to user - add error message
                                    elem[i] = div
                                else:
                                    # move up 1 level in recursion where elem becomes the child and
                                    # is usually replaced by container
                                    return [container, new_trans_ptr]
                            else:
                                # default action for inline transclusions or odd things like circular transclusion error messages
                                classes = child.attrib.get(html.class_, '').split()
                                classes += ret.attrib.get(html.class_, '').split()
                                ret.attrib[html.class_] = ' '.join(classes)
                                elem[i] = ret
                        elif isinstance(ret, types.ListType):
                            # a container has been returned.
                            # Note: there are multiple places where a container may be constructed
                            ret_container, trans_ptr = ret
                            # trans_ptr points to the transclusion within ret_container.
                            # Here the transclusion will always contain a block level element
                            if elem.tag.name in NO_BLOCK_CHILDREN:
                                # Complex case, transclusion effects grand-parent, great-grand-parent, e.g.:
                                # "/* comment {{BlockItem}} */" or  "text ''italic {{BlockItem}} italic'' text"
                                # elem is an inline element, build a bigger container to replace elem's parent,
                                before = copy.deepcopy(elem)
                                after = copy.deepcopy(elem)
                                before[:] = elem[0:i] + ret_container[0:trans_ptr]
                                after[:] = ret_container[trans_ptr + 1:] + elem[i + 1:]
                                if len(before):
                                    container.append(before)
                                new_trans_ptr = len(container)
                                # child may have classes like "comment" that must be added to transcluded element
                                classes = child.attrib.get(moin_page.class_, '').split()
                                # must use moin_page.class_ above, but use html.class below per html_out.py code
                                classes += ret_container[trans_ptr].attrib.get(html.class_, '').split()
                                ret_container[trans_ptr].attrib[html.class_] = ' '.join(classes)
                                container.append(ret_container[trans_ptr])  # the transclusion
                                if len(after):
                                    container.append(after)
                                return [container, new_trans_ptr]
                            else:
                                # elem is a block element
                                for grandchild in child:
                                    if isinstance(grandchild, ET.Node) and grandchild.tag.name == u'include':
                                        # the include may have classes that must be added to transcluded element
                                        classes = grandchild.attrib.get(html.class_, '').split()
                                        classes += ret_container[trans_ptr].attrib.get(html.class_, '').split()
                                        ret_container[trans_ptr].attrib[html.class_] = ' '.join(classes)
                                # replace child element with the container generated in lower recursion
                                elem[i:i + 1] = ret_container  # elem[i] is the child
                        else:
                            # default action for any ret not fitting special cases above,
                            # e.g. tranclusion is within a table cell
                            elem[i] = ret
                # we are finished with this child, advance to next sibling
                i += 1

        finally:
            self.stack.pop()
Beispiel #4
0
    def recurse(self, elem, page_href):
        # on first call, elem.tag.name=='page'. Decendants (body, div, p, include, page, etc.) are processed by recursing through DOM

        # stack is used to detect transclusion loops
        page_href_new = elem.get(self.tag_page_href)
        if page_href_new:
            page_href_new = Iri(page_href_new)
            if page_href_new != page_href:
                page_href = page_href_new
                self.stack.append(page_href)
            else:
                self.stack.append(None)
        else:
            self.stack.append(None)

        try:
            if elem.tag == self.tag_xi_include:
                # we have already recursed several levels and found a transclusion: "{{SomePage}}" or similar
                # process the transclusion and add it to the DOM.  Subsequent recursions will traverse through the transclusion's elements.
                href = elem.get(self.tag_xi_href)
                xpointer = elem.get(self.tag_xi_xpointer)

                xp_include_pages = None
                xp_include_sort = None
                xp_include_items = None
                xp_include_skipitems = None
                xp_include_heading = None
                xp_include_level = None

                if xpointer:
                    xp = XPointer(xpointer)
                    xp_include = None
                    xp_namespaces = {}
                    for entry in xp:
                        uri = None
                        name = entry.name.split(':', 1)
                        if len(name) > 1:
                            prefix, name = name
                            uri = xp_namespaces.get(prefix, False)
                        else:
                            name = name[0]

                        if uri is None and name == 'xmlns':
                            d_prefix, d_uri = entry.data.split('=', 1)
                            xp_namespaces[d_prefix] = d_uri
                        elif uri == moin_page.namespace and name == 'include':
                            xp_include = XPointer(entry.data)

                    if xp_include:
                        for entry in xp_include:
                            name, data = entry.name, entry.data_unescape
                            if name == 'pages':
                                xp_include_pages = data
                            elif name == 'sort':
                                xp_include_sort = data
                            elif name == 'items':
                                xp_include_items = int(data)
                            elif name == 'skipitems':
                                xp_include_skipitems = int(data)
                            elif name == 'heading':
                                xp_include_heading = data
                            elif name == 'level':
                                xp_include_level = data

                if href:
                    # We have a single page to transclude
                    href = Iri(href)
                    link = Iri(scheme='wiki', authority='')
                    if href.scheme == 'wiki':
                        if href.authority:
                            raise ValueError("can't handle xinclude for non-local authority")
                        else:
                            path = href.path[1:]
                    elif href.scheme == 'wiki.local':
                        page = page_href
                        path = href.path
                        if path[0] == '':
                            # /subitem
                            tmp = page.path[1:]
                            tmp.extend(path[1:])
                            path = tmp
                        elif path[0] == '..':
                            # ../sisteritem
                            path = page.path[1:] + path[1:]
                    else:
                        raise ValueError("can't handle xinclude for schemes other than wiki or wiki.local")

                    link.path = path

                    page = Item.create(unicode(path))
                    pages = ((page, link), )

                elif xp_include_pages:
                    # XXX we currently interpret xp_include_pages as wildcard, but it should be regex
                    # for compatibility with moin 1.9. whoosh has upcoming regex support, but it is not
                    # released yet.
                    if xp_include_pages.startswith('^'):
                        # get rid of the leading ^ the Include macro needed to get into "regex mode"
                        xp_include_pages = xp_include_pages[1:]
                    query = And([Term(WIKINAME, app.cfg.interwikiname), Wildcard(NAME_EXACT, xp_include_pages)])
                    reverse = xp_include_sort == 'descending'
                    results = flaskg.storage.search(query, sortedby=NAME_EXACT, reverse=reverse, limit=None)
                    pagelist = [result[NAME] for result in results]
                    if xp_include_skipitems is not None:
                        pagelist = pagelist[xp_include_skipitems:]
                    if xp_include_items is not None:
                        pagelist = pagelist[xp_include_items + 1:]

                    pages = ((Item.create(p), Iri(scheme='wiki', authority='', path='/' + p)) for p in pagelist)

                included_elements = []
                for page, p_href in pages:
                    if p_href.path[0] != '/':
                        p_href.path = IriPath('/' + '/'.join(p_href.path))
                    if p_href in self.stack:
                        # we have a transclusion loop, create an error message showing list of pages forming loop
                        loop = self.stack[self.stack.index(p_href):]
                        loop = [u'{0}'.format(ref.path[1:]) for ref in loop if ref is not None] + [page.name]
                        msg = u'Error: Transclusion loop via: ' + u', '.join(loop)
                        attrib = {getattr(moin_page, 'class'): 'moin-error'}
                        strong = ET.Element(moin_page.strong, attrib, (msg, ))
                        included_elements.append(strong)
                        continue
                    # TODO: Is this correct?
                    if not flaskg.user.may.read(page.name):
                        continue

                    if xp_include_heading is not None:
                        attrib = {self.tag_href: p_href}
                        children = (xp_include_heading or page.name, )
                        elem_a = ET.Element(self.tag_a, attrib, children=children)
                        attrib = {self.tag_outline_level: xp_include_level or '1'}
                        elem_h = ET.Element(self.tag_h, attrib, children=(elem_a, ))
                        included_elements.append(elem_h)

                    page_doc = page.internal_representation()
                    # page_doc.tag = self.tag_div # XXX why did we have this?

                    self.recurse(page_doc, page_href)

                    # if this is an existing item, mark it as a transclusion.  non-existent items are not marked (page_doc.tag.name == u'a')
                    # The href needs to be an absolute URI, without the prefix "wiki://"
                    if page_doc.tag.name == u'page':
                        page_doc = mark_item_as_transclusion(page_doc, p_href.path)
                    included_elements.append(page_doc)

                if len(included_elements) > 1:
                    # use a div as container
                    result = ET.Element(self.tag_div)
                    result.extend(included_elements)
                elif included_elements:
                    result = included_elements[0]
                else:
                    result = None
                #  end of processing for transclusion; the "result" will get inserted into the DOM below
                return result

            # Traverse the DOM by calling self.recurse with each child of the current elem.  Starting elem.tag.name=='page'.
            container = []
            i = 0
            while i < len(elem):
                child = elem[i]
                if isinstance(child, ET.Node):
                    # almost everything in the DOM will be an ET.Node, exceptions are unicode nodes under p nodes

                    ret = self.recurse(child, page_href)

                    if ret:
                        # "Normally" we are here because child.tag.name==include and ret is a transcluded item (ret.tag.name=page, image, or object, etc.)
                        # that must be inserted into the DOM replacing elem[i].
                        # This is complicated by the DOM having many inclusions, such as "\n{{SomePage}}\n" that are a child of a "p".
                        # To prevent generation of invalid HTML5 (e.g. "<p>text<p>text</p></p>"), the DOM must be adjusted.
                        if isinstance(ret, types.ListType):
                            # the transclusion may be a return of the container variable from below, add to DOM replacing the current node
                            elem[i:i+1] = ret
                        elif elem.tag.name == 'p':
                            # ancestor P nodes with tranclusions  have special case issues, we may need to mangle the ret
                            body = ret[0]
                            # check for instance where ret is a page, ret[0] a body, ret[0][0] a P
                            if not isinstance(body, unicode) and ret.tag.name == 'page' and body.tag.name == 'body' and \
                                len(body) == 1 and body[0].tag.name == 'p':
                                # special case:  "some text {{SomePage}} more text" or "\n{{SomePage}}\n" where SomePage contains a single p.
                                # the content of the transcluded P will be inserted directly into ancestor P.
                                p = body[0]
                                # get attributes from page node; we expect {class: "moin-transclusion"; data-href: "http://some.org/somepage"}
                                attrib = Attributes(ret).convert()
                                # make new span node and "convert" p to span by copying all of p's children
                                span = ET.Element(moin_page.span, attrib=attrib, children=p[:])
                                # insert the new span into the DOM replacing old include, page, body, and p elements
                                elem[i] = span
                            elif not isinstance(body, unicode) and ret.tag.name == 'page' and body.tag.name == 'body':
                                # special case: "some text {{SomePage}} more text" or "\n{{SomePage}}\n" and SomePage body contains multiple p's, a table, preformatted text, etc.
                                # note: ancestor P may have text before or after include
                                if i > 0:
                                    # there is text before transclude, make new p node to hold text before include and save in container
                                    pa = ET.Element(moin_page.p)
                                    pa[:] = elem[0:i]
                                    container.append(pa)
                                # get attributes from page node; we expect {class: "moin-transclusion"; data-href: "http://some.org/somepage"}
                                attrib = Attributes(ret).convert()
                                # make new div node, copy all of body's children, and save in container
                                div = ET.Element(moin_page.div, attrib=attrib, children=body[:])
                                container.append(div)
                                 # empty elem of siblings that were just placed in container
                                elem[0:i+1] = []
                                if len(elem) > 0:
                                    # there is text after transclude, make new p node to hold text, copy siblings, save in container
                                    pa = ET.Element(moin_page.p)
                                    pa[:] = elem[:]
                                    container.append(pa)
                                    elem[:] = []
                                # elem is now empty so while loop will terminate and container will be returned up one level in recursion
                            else:
                                # ret may be a unicode string: take default action
                                elem[i] = ret
                        else:
                            # default action for any ret not fitting special cases above
                            elem[i] = ret
                i += 1
            if len(container) > 0:
                return container

        finally:
            self.stack.pop()