def visit_list(self, element): """ Convert a list of item (whatever the type : ordered or unordered) So we have html code like:: <ul> <li>Item 1</li> <li>Item 2</li> </ul> Which will be converted to:: <list> <list-item> <list-item-body>Item 1</list-item-body> </list-item> <list-item> <list-item-body>Item 2</list-item-body> </list-item> </list> """ # We will define the appropriate attribute # according to the type of the list attrib = {} if element.tag == "ul" or element.tag == "dir": attrib[moin_page('item-label-generate')] = 'unordered' elif element.tag == "ol": attrib[moin_page('item-label-generate')] = 'ordered' return ET.Element(moin_page.list, attrib=attrib, children=self.do_children(element))
def visit_moinpage_list(self, element): """ Function called to handle the conversion of list. It will called a specific function to handle (un)ordered list, with the appropriate DocBook tag. Or a specific function to handle definition list. """ item_label_generate = element.get(moin_page('item-label-generate')) if 'ordered' == item_label_generate: attrib = {} # Get the list-style-type to define correctly numeration list_style_type = element.get(moin_page('list-style-type')) if 'upper-alpha' == list_style_type: attrib[docbook('numeration')] = 'upperalpha' elif 'upper-roman' == list_style_type: attrib[docbook('numeration')] = 'upperroman' elif 'lower-alpha' == list_style_type: attrib[docbook('numeration')] = 'loweralpha' elif 'lower-roman' == list_style_type: attrib[docbook('numeration')] = 'lowerroman' else: attrib[docbook('numeration')] = 'arabic' return self.handle_simple_list(docbook.orderedlist, element, attrib=attrib) elif 'unordered' == item_label_generate: return self.handle_simple_list(docbook.itemizedlist, element, attrib={}) else: return self.new_copy(docbook.variablelist, element, attrib={})
def visit_simple_list(self, moin_page_tag, attrib, element, depth): """ There is different list element in DocBook with different semantic meaning, but with an unique result in the DOM Tree. Here we handle the conversion of such of list. """ list_item_tags = set(['listitem', 'step', 'stepalternatives', 'member']) items = [] for child in element: if isinstance(child, ET.Element): if child.tag.name in list_item_tags: children = self.visit(child, depth) list_item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=children) tag = ET.Element(moin_page('list-item'), attrib={}, children=[list_item_body]) tag = (tag, ) items.extend(tag) else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) else: items.append(child) return ET.Element(moin_page.list, attrib=attrib, children=items)
def visit_qandaentry_number(self, element, depth): """ Convert:: <question>Q</question><answer>A</answer> to:: <list-item> <list-item-body><p>Q</p><p>A</p></list-item-body> </list-item> """ items = [] for child in element: if isinstance(child, ET.Element): if child.tag.name == 'question' or child.tag.name == 'answer': r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) else: items.append(child) item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=items) return ET.Element(moin_page('list-item'), attrib={}, children=[item_body])
def visit_docbook_seglistitem(self, element, labels, depth): """ A seglistitem is a list-item for a segmented list. It is quite special because it act list definition with label, but the labels are predetermined in the labels list. So we generate label/body couple according to the content in labels """ new = [] counter = 0 for child in element: if isinstance(child, ET.Element): if child.tag.name == 'seg': label_tag = ET.Element(moin_page('list-item-label'), attrib={}, children=labels[counter % len(labels)]) body_tag = ET.Element(moin_page('list-item-body'), attrib={}, children=self.visit(child, depth)) item_tag = ET.Element(moin_page('list-item'), attrib={}, children=[label_tag, body_tag]) item_tag = (item_tag, ) new.extend(item_tag) counter += 1 else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) new.extend(r) else: new.append(child) return new
def visit_data_element(self, element, depth, object_data, text_object, caption): """ We will try to return an object element based on the object_data. If it is not possible, we return a paragraph with the content of text_object. """ attrib = {} preferred_format, data_tag, mimetype = self.media_tags[element.tag.name] if not object_data: if not text_object: return else: children = self.do_children(element, depth + 1) return self.new(moin_page.p, attrib={}, children=children) # We try to determine the best object to show for obj in object_data: format = obj.get('format') # format is optional: <imagedata format="jpeg" fileref="jpeg.jpg"/> if format: format = format.lower() if format in preferred_format: object_to_show = obj break else: # unsupported format object_to_show = None else: # XXX: Maybe we could add some verification over the extension of the file object_to_show = obj if object_to_show is None: # we could not find any suitable object, return the text_object replacement. children = self.do_children(text_object, depth + 1) return self.new(moin_page.p, attrib={}, children=children) href = object_to_show.get('fileref') if not href: # We could probably try to use entityref, # but at this time we won't support it. return attrib[html.alt] = href attrib[xlink.href] = '+get/' + href format = object_to_show.get('format') if format: format = format.lower() attrib[moin_page('type')] = ''.join([mimetype, format]) else: attrib[moin_page('type')] = mimetype align = object_to_show.get('align') if align and align in set(['left', 'center', 'right', 'top', 'middle', 'bottom']): attrib[html.class_] = align # return object tag, html_out.py will convert to img, audio, or video based on type attr ret = ET.Element(moin_page.object, attrib=attrib) ret = mark_item_as_transclusion(ret, href) if caption: caption = self.new(moin_page.span, attrib={moin_page.class_: 'db-caption'}, children=[caption]) return self.new(moin_page.span, attrib={}, children=[ret, caption]) else: return ret
def error(self, message): """ Return a DOM Tree containing an error message. """ error = self.new(moin_page('error'), attrib={}, children=[message]) part = self.new(moin_page('part'), attrib={}, children=[error]) body = self.new(moin_page('body'), attrib={}, children=[part]) return self.new(moin_page('page'), attrib={}, children=[body])
def add_attr_to_style(attrib, attr): attr = attr.strip().decode('unicode-escape') if not attr.endswith(';'): attr += ';' if attrib.get(moin_page('style'), ""): attrib[moin_page('style')] = attrib.get(moin_page('style'), "") + " " + attr else: attrib[moin_page('style')] = attr
def block_table_repl(self, iter_content, stack, table, table_args=''): stack.clear() # TODO: table attributes elem = moin_page.table() stack.push(elem) if table_args: table_args = _TableArguments()(table_args) for key, value in table_args.keyword.iteritems(): attrib = elem.attrib if key in ('class', 'style', 'number-columns-spanned', 'number-rows-spanned'): attrib[moin_page(key)] = value element = moin_page.table_body() stack.push(element) lines = _Iter(self.block_table_lines(iter_content), startno=iter_content.lineno) element = moin_page.table_row() stack.push(element) preprocessor_status = [] for line in lines: m = self.tablerow_re.match(line) if not m: return if m.group('newrow'): stack.pop_name('table-row') element = moin_page.table_row() stack.push(element) cells = m.group('cells') if cells: cells = cells.split('||') for cell in cells: if stack.top_check('table-cell'): stack.pop() cell = re.split(r'\s*\|\s*', cell) element = moin_page.table_cell() if len(cell) > 1: cell_args = _TableArguments()(cell[0]) for key, value in cell_args.keyword.iteritems(): attrib = element.attrib if key in ('class', 'style', 'number-columns-spanned', 'number-rows-spanned'): attrib[moin_page(key)] = value cell = cell[1] else: cell = cell[0] stack.push(element) self.preprocessor.push() self.parse_inline(cell, stack, self.inline_re) preprocessor_status = self.preprocessor.pop() elif m.group('text'): self.preprocessor.push(preprocessor_status) self.parse_inline('\n{0}'.format(m.group('text')), stack, self.inline_re) preprocessor_status = self.preprocessor.pop() stack.pop_name('table')
def visit_xhtml_td(self, element): attrib = {} rowspan = element.get(html.rowspan) colspan = element.get(html.colspan) if rowspan: attrib[moin_page('number-rows-spanned')] = rowspan if colspan: attrib[moin_page('number-columns-spanned')] = colspan return self.new_copy(moin_page.table_cell, element, attrib=attrib)
def visit_docbook_footnote(self, element, depth): """ <footnote> --> <note note-class="footnote"><note-body> """ attrib = {} key = moin_page("note-class") attrib[key] = "footnote" children = self.new(moin_page("note-body"), attrib={}, children=self.do_children(element, depth)) return self.new(moin_page.note, attrib=attrib, children=[children])
def visit_moinpage_table_cell(self, element): attrib = {} rowspan = element.get(moin_page('number-rows-spanned')) colspan = element.get(moin_page('number-columns-spanned')) print "rowspan : {0}".format(rowspan) if rowspan: attrib[docbook.rowspan] = rowspan if colspan: attrib[docbook.colspan] = colspan return self.new_copy(docbook.td, element, attrib=attrib)
def visit_docbook_footnote(self, element, depth): """ <footnote> --> <note note-class="footnote"><note-body> """ attrib = {} key = moin_page('note-class') attrib[key] = "footnote" children = self.new(moin_page('note-body'), attrib={}, children=self.do_children(element, depth)) return self.new(moin_page.note, attrib=attrib, children=[children])
def visit_docbook_td(self, element, depth): """ <td> --> <table-cell> """ attrib = {} rowspan = element.get("rowspan") colspan = element.get("colspan") if rowspan: attrib[moin_page("number-rows-spanned")] = rowspan if colspan: attrib[moin_page("number-columns-spanned")] = colspan return self.new_copy(moin_page.table_cell, element, depth, attrib=attrib)
def block_table_repl(self, iter_content, stack, table, table_args=""): stack.clear() # TODO: table attributes elem = moin_page.table() stack.push(elem) if table_args: table_args = _TableArguments()(table_args) for key, value in table_args.keyword.iteritems(): attrib = elem.attrib if key in ("class", "style", "number-columns-spanned", "number-rows-spanned"): attrib[moin_page(key)] = value element = moin_page.table_body() stack.push(element) lines = _Iter(self.block_table_lines(iter_content)) element = moin_page.table_row() stack.push(element) preprocessor_status = [] for line in lines: m = self.tablerow_re.match(line) if not m: return if m.group("newrow"): stack.pop_name("table-row") element = moin_page.table_row() stack.push(element) cells = m.group("cells") if cells: cells = cells.split("||") for cell in cells: if stack.top_check("table-cell"): stack.pop() cell = re.split(r"\s*\|\s*", cell) element = moin_page.table_cell() if len(cell) > 1: cell_args = _TableArguments()(cell[0]) for key, value in cell_args.keyword.iteritems(): attrib = element.attrib if key in ("class", "style", "number-columns-spanned", "number-rows-spanned"): attrib[moin_page(key)] = value cell = cell[1] else: cell = cell[0] stack.push(element) self.preprocessor.push() self.parse_inline(cell, stack, self.inline_re) preprocessor_status = self.preprocessor.pop() elif m.group("text"): self.preprocessor.push(preprocessor_status) self.parse_inline("\n{0}".format(m.group("text")), stack, self.inline_re) preprocessor_status = self.preprocessor.pop() stack.pop_name("table")
def visit_docbook_td(self, element, depth): """ <td> --> <table-cell> """ attrib = {} rowspan = element.get('rowspan') colspan = element.get('colspan') if rowspan: attrib[moin_page('number-rows-spanned')] = rowspan if colspan: attrib[moin_page('number-columns-spanned')] = colspan return self.new_copy(moin_page.table_cell, element, depth, attrib=attrib)
def visit_docbook_footnote(self, element, depth): """ <footnote> --> <note note-class="footnote"><note-body> """ attrib = {} key = moin_page('note-class') attrib[key] = "footnote" children = self.new(moin_page('note-body'), attrib={}, children=self.do_children(element, depth)) if len(children) > 1: # must delete lineno because footnote will be placed near end of page and out of sequence del children._children[1].attrib[html.data_lineno] return self.new(moin_page.note, attrib=attrib, children=[children])
def visit_data_element(self, element, depth, object_data, text_object, caption): """ We will try to return an object element based on the object_data. If it is not possible, we return a paragraph with the content of text_object. """ attrib = {} prefered_format, data_tag, mimetype = self.media_tags[element.tag.name] if not object_data: if not text_object: return else: children = self.do_children(child, depth+1)[0] return self.new(moin_page.p, attrib={}, children=children) # We try to determine the best object to show object_to_show = None for obj in object_data: format = obj.get('format') if format: format = format.lower() if format in prefered_format: object_to_show = obj break else: #XXX: Maybe we could add some verification over the # extension of the file object_to_show = obj # If we could not find any suitable object, we return # the text replacement. if not object_to_show: children = self.do_children(child, depth+1)[0] return self.new(moin_page.p, attrib={}, children=children) href = object_to_show.get('fileref') if not href: # We could probably try to use entityref, # but at this time we won't support it. return attrib[xlink.href] = href format = object_to_show.get('format') if format: format = format.lower() attrib[moin_page('type')] = ''.join([mimetype, format]) else: attrib[moin_page('type')] = mimetype return ET.Element(moin_page.object, attrib=attrib)
def visit_qandaentry_qanda(self, element, depth): """ Convert:: <question>Q body</question><answer>A Body</answer> to:: <list-item> <list-item-label>Q:</list-item-label> <list-item-body>Q Body</list-item-body> </list-item> <list-item> <list-item-label>A:</list-item-label> <list-item-body>A Body</list-item-body> </list-item> """ items = [] for child in element: if isinstance(child, ET.Element): r = () item_label = None if child.tag.name == 'question': item_label = ET.Element(moin_page('list-item-label'), attrib={}, children="Q:") elif child.tag.name == 'answer': item_label = ET.Element(moin_page('list-item-label'), attrib={}, children="A:") else: r = self.visit(child, depth) if r is None: r = () elif not isinstance(r, (list, tuple)): r = (r, ) items.extend(r) if item_label is not None: item_body = ET.Element(moin_page('list-item-body'), attrib={}, children=self.visit(child, depth)) r = (item_label, item_body) list_item = ET.Element(moin_page('list-item'), attrib={}, children=r) items.append(list_item) else: items.append(child) return items
def inline_comment_repl(self, stack, comment, comment_begin=None, comment_end=None): if comment_begin: attrib = {moin_page('class'): 'comment'} elem = moin_page.span(attrib=attrib) stack.push(elem) else: stack.pop()
def block_separator_repl(self, _iter_content, stack, separator, hr_class=u'moin-hr{0}'): stack.clear() hr_height = min((len(separator) - 3), 6) hr_height = max(hr_height, 1) attrib = {moin_page('class'): hr_class.format(hr_height)} elem = moin_page.separator(attrib=attrib) stack.top_append(elem)
def visit_docbook_informalequation(self, element, depth): """ <informalequation> --> <div html:class="equation"> """ attrib = {} attrib[html("class")] = "db-equation" return self.new_copy(moin_page("div"), element, depth, attrib=attrib)
def visit_docbook_informalfigure(self, element, depth): """ <informalfigure> --> <div html:class="figure"> """ attrib = {} attrib[html("class")] = "db-figure" return self.new_copy(moin_page("div"), element, depth, attrib=attrib)
def visit_moinpage_h(self, element): """ There is not really heading in DocBook, but rather section with title. The section is a root tag for all the elements which in the dom tree will be between two heading tags. So we need to process child manually to determine correctly the children of each section. A section is closed when we have a new heading with an equal or higher level. """ depth = element.get(moin_page('outline-level')) # We will have a new section # under another section if depth > self.current_section: self.parent_section = self.current_section self.current_section = int(depth) self.section_children[self.current_section] = [] # NB : Error with docbook.title title = ET.Element(docbook('title'), attrib={}, children=element[0]) self.section_children[self.current_section].append(title) # We will close a section before starting a new one # Need more test elif depth < self.current_section: if self.parent_section != 0: section_tag = 'sect{0}'.format(self.parent_section) section = ET.Element(docbook(section_tag), attrib={}, children=self.section_children[self.current_section]) self.section_children[self.parent_section].append(section) self.current_section = int(depth)
def visit_docbook_inlinequation(self, element, depth): """ <inlinequation> --> <span element="equation"> """ attrib = {} attrib[moin_page("element")] = "equation" return self.new_copy(moin_page.span, element, depth, attrib=attrib)
def build_dom_table(self, rows, head=None, cls=None): """ Build a DOM table with data from <rows>. """ table = moin_page.table() if cls is not None: table.attrib[moin_page('class')] = cls if head is not None: table_head = moin_page.table_header() table_row = moin_page.table_row() for idx, cell in enumerate(head): table_cell = moin_page.table_cell(children=[cell, ],) if rows: # add "align: right" to heading cell if cell in first data row is numeric self.add_numeric_class(rows[0][idx], table_cell) table_row.append(table_cell) table_head.append(table_row) table.append(table_head) table_body = moin_page.table_body() for row in rows: table_row = moin_page.table_row() for cell in row: if isinstance(cell, ET.Node) and isinstance(cell[0], unicode) and \ len(cell[0].split()) == 1 and len(cell[0]) > WORDBREAK_LEN: # avoid destroying table layout by applying special styling to cells with long file name hyperlinks table_cell = moin_page.table_cell(children=[cell, ], attrib={moin_page.class_: 'moin-wordbreak'}) else: table_cell = moin_page.table_cell(children=[cell, ],) self.add_numeric_class(cell, table_cell) table_row.append(table_cell) table_body.append(table_row) table.append(table_body) return table
def visit_docbook_inlinequation(self, element, depth): """ <inlinequation> --> <span element="equation"> """ attrib = {} attrib[moin_page('element')] = 'equation' return self.new_copy(moin_page.span, element, depth, attrib=attrib)
def visit_moinpage_blockquote(self, element): """ Convert:: <blockquote>text<blockquote> to:: <blockquote> <attribution>Unknown</attribution> <simpara>text</text> </blockquote> Expand:: <blockquote source="author">text</blockquote> output:: <blockquote> <attribution>author</attribution> <simpara>text</text> </blockquote> """ author = element.get(moin_page('source')) if not author: # TODO: Internationalization author = "Unknown" attribution = self.new(docbook('attribution'), attrib={}, children=[author]) children = self.do_children(element) para = self.new(docbook('simpara'), attrib={}, children=children) return self.new(docbook('blockquote'), attrib={}, children=[attribution, para])
def visit_docbook_blockquote(self, element, depth): """ <blockquote> <attribution>Author</attribution> Text </blockquote> --> <blockquote source="Author">Text</blockquote> <blockquote>Text</blockquote> --> <blockquote source="Unknow">Text</blockquote> """ # TODO: Translate source = u"Unknow" children = [] for child in element: if isinstance(child, ET.Element): if child.tag.name == "attribution": source = self.do_children(child, depth + 1) else: children.extend(self.do_children(child, depth + 1)) else: children.append(child) attrib = {} attrib[moin_page('source')] = source[0] return self.new(moin_page.blockquote, attrib=attrib, children=children)
def visit_docbook_informalfigure(self, element, depth): """ <informalfigure> --> <div html:class="figure"> """ attrib = {} attrib[html.class_] = 'db-figure' return self.new_copy(moin_page('div'), element, depth, attrib=attrib)
def visit_docbook_trademark(self, element, depth): """ Depending of the trademark class, a specific entities is added to the string. Docbook supports 4 types of trademark: copyright, registered, trade (mark), and service (mark). <trademark> --> <span class="db-trademark"> """ trademark_entities = {'copyright': u'\u00a9 ', 'registered': u'\u00ae', 'trade': u'\u2122', } trademark_class = element.get('class') children = self.do_children(element, depth) if trademark_class in trademark_entities: if trademark_class == 'copyright': children.insert(0, trademark_entities[trademark_class]) else: children.append(trademark_entities[trademark_class]) elif trademark_class == 'service': sup_attrib = {moin_page('baseline-shift'): 'super'} service_mark = self.new(moin_page.span, attrib=sup_attrib, children=['SM']) children.append(service_mark) attrib = {html.class_: 'db-trademark'} return self.new(moin_page.span, attrib=attrib, children=children)
def visit_docbook_informalequation(self, element, depth): """ <informalequation> --> <div html:class="equation"> """ attrib = {} attrib[html.class_] = 'db-equation' return self.new_copy(moin_page('div'), element, depth, attrib=attrib)
def visit_docbook_sect(self, element, depth): """ This is the function to convert a numbered section. Numbered section uses tag like <sectN> where N is the number of the section between 1 and 5. The sections are supposed to be correctly nested. We only convert a section to an heading if one of the children is a title element. TODO: See if we can unify with recursive section below. TODO: Add div element, with specific id """ self.is_section = True title = '' for child in element: if isinstance(child, ET.Element): uri = child.tag.uri name = self.docbook_namespace.get(uri, None) if name == 'docbook' and child.tag.name == 'title': title = child # Remove the title element to avoid double conversion element.remove(child) heading_level = element.tag.name[4] key = moin_page('outline-level') attrib = {} attrib[key] = heading_level return self.new(moin_page.h, attrib=attrib, children=title)
def visit_data_element(self, element, depth, object_data, text_object, caption): """ We will try to return an object element based on the object_data. If it is not possible, we return a paragraph with the content of text_object. """ attrib = {} prefered_format, data_tag, mimetype = self.media_tags[element.tag.name] if not object_data: if not text_object: return else: children = self.do_children(child, depth + 1)[0] return self.new(moin_page.p, attrib={}, children=children) # We try to determine the best object to show object_to_show = None for obj in object_data: format = obj.get("format") if format: format = format.lower() if format in prefered_format: object_to_show = obj break else: # XXX: Maybe we could add some verification over the # extension of the file object_to_show = obj # If we could not find any suitable object, we return # the text replacement. if not object_to_show: children = self.do_children(child, depth + 1)[0] return self.new(moin_page.p, attrib={}, children=children) href = object_to_show.get("fileref") if not href: # We could probably try to use entityref, # but at this time we won't support it. return attrib[xlink.href] = href format = object_to_show.get("format") if format: format = format.lower() attrib[moin_page("type")] = "".join([mimetype, format]) else: attrib[moin_page("type")] = mimetype return ET.Element(moin_page.object, attrib=attrib)
def visit_docbook_itemizedlist(self, element, depth): """ <itemizedlist> --> <list item-label-generate="unordered"> """ attrib = {} key = moin_page('item-label-generate') attrib[key] = 'unordered' return self.visit_simple_list(moin_page.list, attrib, element, depth)
def visit_docbook_superscript(self, element, depth): """ <superscript> --> <span baseline-shift="super"> """ attrib = {} key = moin_page('baseline-shift') attrib[key] = 'super' return self.new_copy(moin_page.span, element, depth, attrib=attrib)
def visit_docbook_informalexample(self, element, depth): """ <informalexample> --> <div html:class="example"> """ attrib = {} attrib[html('class')] = 'db-example' return self.new_copy(moin_page('div'), element, depth, attrib=attrib)
def visit_xhtml_strike(self, element): """ <strike>Text</strike> --> <span text-decoration="line-through">Text</span> """ key = moin_page('text-decoration') attrib = {} attrib[key] = 'line-through' return self.new_copy(moin_page.span, element, attrib)
def visit_xhtml_del(self, element): """ <del>Text</del> --> <span text-decoration="underline">Text</span> """ key = moin_page('text-decoration') attrib = {} attrib[key] = 'line-through' return self.new_copy(moin_page.span, element, attrib)
def visit_xhtml_ins(self, element): """ <ins>Text</ins> --> <span text-decoration="underline">Text</span> """ key = moin_page('text-decoration') attrib = {} attrib[key] = 'underline' return self.new_copy(moin_page.span, element, attrib)
def visit_docbook_entrytbl(self, element, depth): """ Return a table within a table-cell. """ table_element = self.new_copy(moin_page.table, element, depth, attrib={}) return self.new(moin_page('table-cell'), attrib={}, children=[table_element])