def visit_moinpage_h(self, element): """ There is not really heading in DocBook, but rather section with title. The section is a root tag for all the elements which in the dom tree will be between two heading tags. So we need to process child manually to determine correctly the children of each section. A section is closed when we have a new heading with an equal or higher level. """ depth = element.get(moin_page('outline-level')) # We will have a new section # under another section if depth > self.current_section: self.parent_section = self.current_section self.current_section = int(depth) self.section_children[self.current_section] = [] # NB : Error with docbook.title title = ET.Element(docbook('title'), attrib={}, children=element[0]) self.section_children[self.current_section].append(title) # We will close a section before starting a new one # Need more test elif depth < self.current_section: if self.parent_section != 0: section_tag = 'sect{0}'.format(self.parent_section) section = ET.Element(docbook(section_tag), attrib={}, children=self.section_children[self.current_section]) self.section_children[self.parent_section].append(section) self.current_section = int(depth)
def visit_simple_list(self, moin_page_tag, attrib, element, depth): """ There is different list element in DocBook with different semantic meaning, but with an unique result in the DOM Tree. Here we handle the conversion of such of list. """ list_item_tags = set(['listitem', 'step', 'stepalternatives', 'member']) items = [] for child in element: if isinstance(child, ET.Element): if child.tag.name in list_item_tags: children = self.visit(child, depth) list_item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=children) tag = ET.Element(moin_page('list-item'), attrib={}, children=[list_item_body]) tag = (tag, ) items.extend(tag) else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) else: items.append(child) return ET.Element(moin_page.list, attrib=attrib, children=items)
def visit_qandaentry_number(self, element, depth): """ Convert:: <question>Q</question><answer>A</answer> to:: <list-item> <list-item-body><p>Q</p><p>A</p></list-item-body> </list-item> """ items = [] for child in element: if isinstance(child, ET.Element): if child.tag.name == 'question' or child.tag.name == 'answer': r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) else: items.append(child) item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=items) return ET.Element(moin_page('list-item'), attrib={}, children=[item_body])
def visit_docbook_seglistitem(self, element, labels, depth): """ A seglistitem is a list-item for a segmented list. It is quite special because it act list definition with label, but the labels are predetermined in the labels list. So we generate label/body couple according to the content in labels """ new = [] counter = 0 for child in element: if isinstance(child, ET.Element): if child.tag.name == 'seg': label_tag = ET.Element(moin_page('list-item-label'), attrib={}, children=labels[counter % len(labels)]) body_tag = ET.Element(moin_page('list-item-body'), attrib={}, children=self.visit(child, depth)) item_tag = ET.Element(moin_page('list-item'), attrib={}, children=[label_tag, body_tag]) item_tag = (item_tag, ) new.extend(item_tag) counter += 1 else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) new.extend(r) else: new.append(child) return new
def convert(self): new = {} new_default = {} for key, value in self.element.attrib.iteritems(): if key.uri == moin_page: # We never have _ in attribute names, so ignore them instead of # create ambigues matches. if not '_' in key.name: n = 'visit_' + key.name.replace('-', '_') f = getattr(self, n, None) if f is not None: f(value, new) elif key.uri in self.namespaces_valid_output: new[key] = value # We convert xml:id elif key.uri == xml.namespace: if key.name == 'id' or key.name == 'lang': new[ET.QName(key.name, html.namespace)] = value elif key.uri is None: if self.default_uri_input and not '_' in key.name: n = 'visit_' + key.name.replace('-', '_') f = getattr(self, n, None) if f is not None: f(value, new_default) elif self.default_uri_output: new_default[ET.QName(key.name, self.default_uri_output)] = value # Attributes with namespace overrides attributes with empty namespace. new_default.update(new) return new_default
def visit_xhtml_dl(self, element): """ Convert a list of definition. The starting structure:: <dl> <dt>Label 1</dt><dd>Text 1</dd> <dt>Label 2</dt><dd>Text 2</dd> </dl> will be converted to:: <list> <list-item> <list-item-label>Label 1</list-item-label> <list-item-body>Text 1</list-item-body> </list-item> <list-item> <list-item-label>Label 2</list-item-label> <list-item-body>Text 2</list-item-body> </list-item> </list> """ list_item = [] pair = [] number_pair = 0 # We will browse the child, and try to catch all the pair # of <dt><dd> for child in element: # We need one dt tag, and one dd tag, a have a pair if child.tag.name == 'dt' or child.tag.name == 'dd': number_pair += 1 # The following code is similar to do_children method if isinstance(child, ET.Element): r = self.visit(child) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) pair.extend(r) else: pair.append(r) if number_pair == 2: # We have two elements of the pair # So we can put it into a <list-item> element list_item_element = ET.Element(moin_page.list_item, attrib={}, children=pair) list_item.append(list_item_element) pair = [] number_pair = 0 # we return the <list> with all the list item element return ET.Element(moin_page.list, attrib={}, children=list_item)
def new(self, tag, attrib, children): """ Return a new element in the DocBook tree. """ if self.standard_attribute: attrib.update(self.standard_attribute) self.standard_attribute = {} if self.current_section > 0: self.section_children[self.current_section].append( ET.Element(tag, attrib=attrib, children=children)) else: return ET.Element(tag, attrib=attrib, children=children)
def visit_moinpage_list_item_body(self, element): items = [] for child in element: if isinstance(child, ET.Element): r = self.visit(child) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) else: an_item = ET.Element(docbook.simpara, attrib={}, children=child) items.append(an_item) return ET.Element(docbook.listitem, attrib={}, children=items)
def visit_xhtml_li(self, element): """ NB : A list item (<li>) is like the following snippet:: <list-item> <list-item-label>label</list-item-label> <list-item-body>Body</list-item-body> </list-item> For <li> element, there is no label """ list_item_body = ET.Element(moin_page.list_item_body, attrib={}, children=self.do_children(element)) return ET.Element(moin_page.list_item, attrib={}, children=[list_item_body])
def visit_qandaentry_qanda(self, element, depth): """ Convert:: <question>Q body</question><answer>A Body</answer> to:: <list-item> <list-item-label>Q:</list-item-label> <list-item-body>Q Body</list-item-body> </list-item> <list-item> <list-item-label>A:</list-item-label> <list-item-body>A Body</list-item-body> </list-item> """ items = [] for child in element: if isinstance(child, ET.Element): r = () item_label = None if child.tag.name == 'question': item_label = ET.Element(moin_page('list-item-label'), attrib={}, children="Q:") elif child.tag.name == 'answer': item_label = ET.Element(moin_page('list-item-label'), attrib={}, children="A:") else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) if item_label is not None: item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=self.visit(child, depth)) r = (item_label, item_body) list_item = ET.Element(moin_page('list-item'), attrib={}, children=r) items.append(list_item) else: items.append(child) return items
def visit_docbook_segmentedlist(self, element, depth): """ A segmented list is a like a list of definition, but the label are defined at the start with <segtitle> tag and then for each definition, we repeat the label. So to convert such list, we will first determine and save the labels. Then we will iterate over the object to get the definition. """ labels = [] new = [] for child in element: if isinstance(child, ET.Element): r = None if child.tag.name == 'segtitle': r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) labels.extend(r) else: if child.tag.name == 'seglistitem': r = self.visit_docbook_seglistitem(child, labels, depth) else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) new.extend(r) else: new.append(child) return ET.Element(moin_page.list, attrib={}, children=new)
def visit_list(self, element): """ Convert a list of item (whatever the type : ordered or unordered) So we have html code like:: <ul> <li>Item 1</li> <li>Item 2</li> </ul> Which will be converted to:: <list> <list-item> <list-item-body>Item 1</list-item-body> </list-item> <list-item> <list-item-body>Item 2</list-item-body> </list-item> </list> """ # We will define the appropriate attribute # according to the type of the list attrib = {} if element.tag == "ul" or element.tag == "dir": attrib[moin_page('item-label-generate')] = 'unordered' elif element.tag == "ol": attrib[moin_page('item-label-generate')] = 'ordered' return ET.Element(moin_page.list, attrib=attrib, children=self.do_children(element))
def visit_data_element(self, element, depth, object_data, text_object, caption): """ We will try to return an object element based on the object_data. If it is not possible, we return a paragraph with the content of text_object. """ attrib = {} preferred_format, data_tag, mimetype = self.media_tags[element.tag.name] if not object_data: if not text_object: return else: children = self.do_children(element, depth + 1) return self.new(moin_page.p, attrib={}, children=children) # We try to determine the best object to show for obj in object_data: format = obj.get('format') # format is optional: <imagedata format="jpeg" fileref="jpeg.jpg"/> if format: format = format.lower() if format in preferred_format: object_to_show = obj break else: # unsupported format object_to_show = None else: # XXX: Maybe we could add some verification over the extension of the file object_to_show = obj if object_to_show is None: # we could not find any suitable object, return the text_object replacement. children = self.do_children(text_object, depth + 1) return self.new(moin_page.p, attrib={}, children=children) href = object_to_show.get('fileref') if not href: # We could probably try to use entityref, # but at this time we won't support it. return attrib[html.alt] = href attrib[xlink.href] = '+get/' + href format = object_to_show.get('format') if format: format = format.lower() attrib[moin_page('type')] = ''.join([mimetype, format]) else: attrib[moin_page('type')] = mimetype align = object_to_show.get('align') if align and align in set(['left', 'center', 'right', 'top', 'middle', 'bottom']): attrib[html.class_] = align # return object tag, html_out.py will convert to img, audio, or video based on type attr ret = ET.Element(moin_page.object, attrib=attrib) ret = mark_item_as_transclusion(ret, href) if caption: caption = self.new(moin_page.span, attrib={moin_page.class_: 'db-caption'}, children=[caption]) return self.new(moin_page.span, attrib={}, children=[ret, caption]) else: return ret
def new(self, tag, attrib, children): """ Return a new element for the DocBook Tree. """ if self.standard_attribute: attrib.update(self.standard_attribute) self.standard_attribute = {} return ET.Element(tag, attrib=attrib, children=children)
def XML(text, parser=None): """ Copied from EmeraldTree/tree.py to force use of local XMLParser class override. """ if not parser: parser = XMLParser(target=ET.TreeBuilder()) parser.feed(text) return parser.close()
def new_copy_symmetric(self, element, attrib): """ Create a new QName, with the same tag of the element, but with a different namespace. Then, we handle the copy normally. """ tag = ET.QName(element.tag.name, moin_page) return self.new_copy(tag, element, attrib)
def visit_moinpage_h(self, elem): level = elem.get(moin_page.outline_level, 1) try: level = int(level) except ValueError: raise ElementException('page:outline-level needs to be an integer') if level < 1: level = 1 elif level > 6: level = 6 return self.new_copy(ET.QName('h%d' % level, html), elem)
def visit_moinpage_table(self, element): # TODO: Attributes conversion title = element.get(html('title')) if not title: # TODO: Translation title = "Table {0}".format(self.table_counter) self.table_counter += 1 caption = ET.Element(docbook('caption'), attrib={}, children=[title]) children = [caption] children.extend(self.do_children(element)) return self.new(docbook.table, attrib={}, children=children)
def macro(self): return ET.XML(""" <form xmlns="%s" method="get" action="%s/%s"> <input type="hidden" name="do" value="goto" /> <p> <input type="text" name="target" size="30" /> <input type="submit" value="%s" /> </p> </form> """ % (html, self.request.getScriptname(), self.page_name, _("Go To Item"))) #HHH ?
def visit_moinpage_page(self, element): title = ET.Element(docbook('title'), attrib={}, children=[self.title]) info = ET.Element(docbook.info, attrib={}, children=[title]) for item in element: if item.tag.uri == moin_page and item.tag.name == 'body': c = self.do_children(item) if not c: self.section_children = sorted(self.section_children.items(), reverse=True) section = None for k, v in self.section_children: if section: section_tag = 'sect{0}'.format(k) v.append(section) section = ET.Element(docbook(section_tag), attrib={}, children=v) else: section_tag = 'sect{0}'.format(k) section = ET.Element(docbook(section_tag), attrib={}, children=v) return ET.Element(docbook.article, attrib={}, children=[info, section]) else: c.insert(0, info) return ET.Element(docbook.article, attrib={}, children=c) raise RuntimeError('page:page need to contain exactly one page body tag, got {0!r}'.format(element[:]))
def visit_moinpage_note(self, elem): # TODO: Check note-class top = self._special_stack[-1] if len(elem) == 0: # explicit footnote placement: show prior footnotes, empty stack, reset counter if len(top._footnotes) == 0: return footnotes_div = self.create_footnotes(top) top.remove_footnotes() self._id.zero_id('note') # bump note-placement counter to insure unique footnote ids self._id.gen_id('note-placement') return footnotes_div body = None for child in elem: if child.tag.uri == moin_page: if child.tag.name == 'note-body': body = self.do_children(child) id = self._id.gen_id('note') prefixed_id = '%s-%s' % (self._id.get_id('note-placement'), id) elem_ref = ET.XML(""" <html:sup xmlns:html="{0}" html:id="note-{1}-ref" html:class="moin-footnote"> <html:a html:href="#note-{2}">{3}</html:a> </html:sup> """.format(html, prefixed_id, prefixed_id, id)) elem_note = ET.XML(""" <html:p xmlns:html="{0}" html:id="note-{1}"> <html:sup><html:a html:href="#note-{2}-ref">{3}</html:a></html:sup> </html:p> """.format(html, prefixed_id, prefixed_id, id)) elem_note.extend(body) top.add_footnote(elem_note) return elem_ref
def visit_xhtml_table(self, element): attrib = self.convert_attributes(element) # we should not have any strings in the child list_table_elements = [] for child in element: if isinstance(child, ET.Element): r = self.visit(child) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) list_table_elements.extend(r) return ET.Element(moin_page.table, attrib=attrib, children=list_table_elements)
def visit_xhtml_list(self, element): """ Convert a list of items (whatever the type : ordered or unordered) So we have html code like:: <ul> <li>Item 1</li> <li>Item 2</li> </ul> Which will be converted to:: <list> <list-item> <list-item-body>Item 1</list-item-body> </list-item> <list-item> <list-item-body>Item 2</list-item-body> </list-item> </list> """ # We will define the appropriate attribute # according to the type of the list attrib = self.convert_attributes(element) if element.tag.name == "ul" or element.tag.name == "dir": attrib[moin_page('item-label-generate')] = 'unordered' elif element.tag.name == "ol": attrib[moin_page('item-label-generate')] = 'ordered' # We check which kind of style we have style = element.get(html.type) if 'A' == style: attrib[moin_page('list-style-type')] = 'upper-alpha' elif 'I' == style: attrib[moin_page('list-style-type')] = 'upper-roman' elif 'a' == style: attrib[moin_page('list-style-type')] = 'lower-alpha' elif 'i' == style: attrib[moin_page('list-style-type')] = 'lower-roman' # we should not have any strings in the child list_items = [] for child in element: if isinstance(child, ET.Element): r = self.visit(child) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) list_items.extend(r) return ET.Element(moin_page.list, attrib=attrib, children=list_items)
def visit_data_element(self, element, depth, object_data, text_object, caption): """ We will try to return an object element based on the object_data. If it is not possible, we return a paragraph with the content of text_object. """ attrib = {} prefered_format, data_tag, mimetype = self.media_tags[element.tag.name] if not object_data: if not text_object: return else: children = self.do_children(child, depth+1)[0] return self.new(moin_page.p, attrib={}, children=children) # We try to determine the best object to show object_to_show = None for obj in object_data: format = obj.get('format') if format: format = format.lower() if format in prefered_format: object_to_show = obj break else: #XXX: Maybe we could add some verification over the # extension of the file object_to_show = obj # If we could not find any suitable object, we return # the text replacement. if not object_to_show: children = self.do_children(child, depth+1)[0] return self.new(moin_page.p, attrib={}, children=children) href = object_to_show.get('fileref') if not href: # We could probably try to use entityref, # but at this time we won't support it. return attrib[xlink.href] = href format = object_to_show.get('format') if format: format = format.lower() attrib[moin_page('type')] = ''.join([mimetype, format]) else: attrib[moin_page('type')] = mimetype return ET.Element(moin_page.object, attrib=attrib)
def test_converter_refs(tree_xml, links_expected, transclusions_expected, external_expected): converter = ConverterItemRefs() tree = ET.XML(tree_xml) converter(tree) links_result = converter.get_links() transclusions_result = converter.get_transclusions() external_result = converter.get_external_links() # sorting instead of sets # so that we avoid deduplicating duplicated items in the result assert sorted(links_result) == sorted(links_expected) assert sorted(transclusions_result) == sorted(transclusions_expected) assert sorted(external_result) == sorted(external_expected)
def visit_moinpage_note(self, elem): # TODO: Check note-class body = None for child in elem: if child.tag.uri == moin_page: if child.tag.name == 'note-body': body = self.do_children(child) id = self._id.gen_id('note') elem_ref = ET.XML(""" <html:sup xmlns:html="%s" html:id="note-%d-ref" html:class="moin-footnote"><html:a html:href="#note-%d">%d</html:a></html:sup> """ % (html, id, id, id)) elem_note = ET.XML(""" <html:p xmlns:html="%s" html:id="note-%d"><html:sup><html:a html:href="#note-%d-ref">%d</html:a></html:sup></html:p> """ % (html, id, id, id)) elem_note.extend(body) self._special_stack[-1].add_footnote(elem_note) return elem_ref
def visit_docbook_table(self, element, depth): """ <table> --> <table> """ # we should not have any strings in the child list_table_elements = [] for child in element: if isinstance(child, ET.Element): r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) list_table_elements.extend(r) return ET.Element(moin_page.table, attrib={}, children=list_table_elements)
def handle_simple_list(self, docbook_tag, element, attrib): list_items = [] for child in element: if isinstance(child, ET.Element): # We do not care about <list-item> if child.tag.name != 'list-item': r = self.visit(child) else: r = self.do_children(child) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) list_items.extend(r) return ET.Element(docbook_tag, attrib=attrib, children=list_items)
def replace_smiley(self, text): """ Replace a given string by the appropriate element if the string is exactly a smiley. Otherwise return the string without any change. """ # Remove the space of the smiley_text if any smiley_markup = text.strip() if smiley_markup in self.smileys: smiley_name = self.smileys[smiley_markup] attrib = {moin_page('class'): 'moin-text-icon moin-' + smiley_name} return ET.Element(moin_page.span, attrib=attrib, children=[smiley_markup]) else: # if the text was not a smiley, just return the markup without any transformations return text
def _convert(self, doc): from emeraldtree import ElementTree as ET from MoinMoin.converter import default_registry as reg doc = self._expand_document(doc) # We convert the internal representation of the document # into a DocBook document conv = reg.get(type_moin_document, Type('application/docbook+xml')) doc = conv(doc) # We determine the different namespaces of the output form output_namespaces = { docbook.namespace: '', xlink.namespace: 'xlink', } # We convert the result into a StringIO object # With the appropriate namespace # TODO: Some other operation should probably be done here too # like adding a doctype file_to_send = StringIO() tree = ET.ElementTree(doc) tree.write(file_to_send, namespaces=output_namespaces) # We determine the different parameters for the reply mt = MimeType(mimestr='application/docbook+xml;charset=utf-8') content_type = mt.content_type() as_attachment = mt.as_attachment(app.cfg) # After creation of the StringIO, we are at the end of the file # so position is the size the file. # and then we should move it back at the beginning of the file content_length = file_to_send.tell() file_to_send.seek(0) # Important: empty filename keeps flask from trying to autodetect filename, # as this would not work for us, because our file's are not necessarily fs files. return send_file( file=file_to_send, mimetype=content_type, as_attachment=as_attachment, attachment_filename=None, cache_timeout=10, # wiki data can change rapidly add_etags=False, etag=None, conditional=True)