def parse_attrib(self, attrib): '''Convenience method to enforce the supported attributes and their types. @returns: a L{ConfigDict} using the C{object_attr} dict as definition ''' if not isinstance(attrib, ConfigDict): attrib = ConfigDict(attrib) attrib.define(self.object_attr) return attrib
class DumperClass(Visitor): '''Base class for dumper classes. Dumper classes serialize the content of a parse tree back to a text representation of the page content. Therefore this class implements the visitor API, so it can be used with any parse tree implementation or parser object that supports this API. To implement a dumper class, you need to define handlers for all tags that can appear in a page. Tags that are represented by a simple prefix and postfix string can be defined in the dictionary C{TAGS}. For example to define the italic tag in html output the dictionary should contain a definition like: C{EMPHASIS: ('<i>', '</i>')}. For tags that require more complex logic you can define a method to format the tag. Typical usage is to format link attributes in such a method. The method name should be C{dump_} + the name of the tag, e.g. C{dump_link()} for links (see the constants with tag names for the other tags). Such a sump method will get 3 arguments: the tag name itself, a dictionary with the tag attributes and a list of strings that form the tag content. The method should return a list of strings that represents the formatted text. This base class takes care of a stack of nested formatting tags and when a tag is closed either picks the appropriate prefix and postfix from C{TAGS} or calls the corresponding C{dump_} method. As a result tags are serialized depth-first. @ivar linker: the (optional) L{Linker} object, used to resolve links @ivar template_options: a L{ConfigDict} with options that may be set in a template (so inherently not safe !) to control the output style. Formats using this need to define the supported keys in the dict C{TEMPLATE_OPTIONS}. @ivar context: the stack of open tags maintained by this class. Can be used in C{dump_} methods to inspect the parent scope of the format. Elements on this stack have "tag", "attrib" and "text" attributes. Keep in mind that the parent scope is not yet complete when a tag is serialized. ''' TAGS = {} #: dict mapping formatting tags to 2-tuples of a prefix and a postfix string TEMPLATE_OPTIONS = {} #: dict mapping ConfigDefinitions for template options def __init__(self, linker=None, template_options=None): self.linker = linker self.template_options = ConfigDict(template_options) self.template_options.define(self.TEMPLATE_OPTIONS) self.context = [] self._text = [] def dump(self, tree): '''Format a parsetree to text @param tree: a parse tree object that supports a C{visit()} method @returns: a list of lines ''' # FIXME - issue here is that we need to reset state - should be in __init__ self._text = [] self.context = [DumperContextElement(None, None, self._text)] tree.visit(self) if len(self.context) != 1: raise AssertionError('Unclosed tags on tree: %s' % self.context[-1].tag) #~ import pprint; pprint.pprint(self._text) return self.get_lines() # FIXME - maybe just return text ? def get_lines(self): '''Return the dumped content as a list of lines Should only be called after closing the top level element ''' return ''.join(self._text).splitlines(1) def start(self, tag, attrib=None): if attrib: attrib = attrib.copy() # Ensure dumping does not change tree self.context.append(DumperContextElement(tag, attrib, [])) def text(self, text): assert not text is None if self.context[-1].tag != OBJECT: text = self.encode_text(self.context[-1].tag, text) self.context[-1].text.append(text) def end(self, tag): if not tag or tag != self.context[-1].tag: raise AssertionError('Unexpected tag closed: %s' % tag) _, attrib, strings = self.context.pop() if tag in self.TAGS: assert strings, 'Can not append empty %s element' % tag start, end = self.TAGS[tag] strings.insert(0, start) strings.append(end) elif tag == FORMATTEDTEXT: pass else: try: method = getattr(self, 'dump_' + tag) except AttributeError: raise AssertionError('BUG: Unknown tag: %s' % tag) strings = method(tag, attrib, strings) #~ try: #~ u''.join(strings) #~ except: #~ print("BUG: %s returned %s" % ('dump_'+tag, strings)) if strings is not None: self.context[-1].text.extend(strings) def append(self, tag, attrib=None, text=None): strings = None if tag in self.TAGS: assert text is not None, 'Can not append empty %s element' % tag start, end = self.TAGS[tag] text = self.encode_text(tag, text) strings = [start, text, end] elif tag == FORMATTEDTEXT: if text is not None: strings = [self.encode_text(tag, text)] else: if attrib: attrib = attrib.copy() # Ensure dumping does not change tree try: method = getattr(self, 'dump_' + tag) except AttributeError: raise AssertionError('BUG: Unknown tag: %s' % tag) if text is None: strings = method(tag, attrib, []) elif tag == OBJECT: strings = method(tag, attrib, [text]) else: strings = method(tag, attrib, [self.encode_text(tag, text)]) if strings is not None: self.context[-1].text.extend(strings) def encode_text(self, tag, text): '''Optional method to encode text elements in the output @note: Do not apply text encoding in the C{dump_} methods, the list of strings given there may contain prefix and postfix formatting of nested tags. @param tag: formatting tag @param text: text to be encoded @returns: encoded text @implementation: optional, default just returns unmodified input ''' return text def prefix_lines(self, prefix, strings): '''Convenience method to wrap a number of lines with e.g. an indenting sequence. @param prefix: a string to prefix each line @param strings: a list of pieces of text @returns: a new list of lines, each starting with prefix ''' lines = ''.join(strings).splitlines(1) return [prefix + l for l in lines] def dump_object(self, tag, attrib, strings=[]): '''Dumps objects defined by L{InsertedObjectType}''' format = str(self.__class__.__module__).split('.')[-1] try: obj = PluginManager.insertedobjects[attrib['type']] except KeyError: pass else: try: output = obj.format(format, self, attrib, ''.join(strings)) except ValueError: pass else: assert isinstance(output, (list, tuple)), "Invalid output: %r" % output return output if attrib['type'].startswith('image+'): # Fallback for backward compatibility of image generators < zim 0.70 attrib = attrib.copy() attrib['type'] = attrib['type'][6:] return self.dump_img(IMAGE, attrib, None) else: return self.dump_object_fallback(tag, attrib, strings) def dump_object_fallback(self, tag, attrib, strings=None): '''Method to serialize objects that do not have their own handler for this format. @implementation: must be implemented in sub-classes ''' raise NotImplementedError def isrtl(self, text): '''Check for Right To Left script @param text: the text to check @returns: C{True} if C{text} starts with characters in a RTL script, or C{None} if direction is not determined. ''' if Pango is None: return None # It seems the find_base_dir() function is not documented in the # python language bindings. The Gtk C code shows the signature: # # Pango.find_base_dir(text, length) # # It either returns a direction, or NEUTRAL if e.g. text only # contains punctuation but no real characters. dir = Pango.find_base_dir(text, len(text)) if dir == Pango.Direction.NEUTRAL: return None else: return dir == Pango.Direction.RTL
class DumperClass(Visitor): '''Base class for dumper classes. Dumper classes serialize the content of a parse tree back to a text representation of the page content. Therefore this class implements the visitor API, so it can be used with any parse tree implementation or parser object that supports this API. To implement a dumper class, you need to define handlers for all tags that can appear in a page. Tags that are represented by a simple prefix and postfix string can be defined in the dictionary C{TAGS}. For example to define the italic tag in html output the dictionary should contain a definition like: C{EMPHASIS: ('<i>', '</i>')}. For tags that require more complex logic you can define a method to format the tag. Typical usage is to format link attributes in such a method. The method name should be C{dump_} + the name of the tag, e.g. C{dump_link()} for links (see the constants with tag names for the other tags). Such a sump method will get 3 arguments: the tag name itself, a dictionary with the tag attributes and a list of strings that form the tag content. The method should return a list of strings that represents the formatted text. This base class takes care of a stack of nested formatting tags and when a tag is closed either picks the appropriate prefix and postfix from C{TAGS} or calls the corresponding C{dump_} method. As a result tags are serialized depth-first. @ivar linker: the (optional) L{Linker} object, used to resolve links @ivar template_options: a L{ConfigDict} with options that may be set in a template (so inherently not safe !) to control the output style. Formats using this need to define the supported keys in the dict C{TEMPLATE_OPTIONS}. @ivar context: the stack of open tags maintained by this class. Can be used in C{dump_} methods to inspect the parent scope of the format. Elements on this stack have "tag", "attrib" and "text" attributes. Keep in mind that the parent scope is not yet complete when a tag is serialized. ''' TAGS = {} #: dict mapping formatting tags to 2-tuples of a prefix and a postfix string TEMPLATE_OPTIONS = {} #: dict mapping ConfigDefinitions for template options def __init__(self, linker=None, template_options=None): self.linker = linker self.template_options = ConfigDict(template_options) self.template_options.define(self.TEMPLATE_OPTIONS) self.context = [] self._text = [] def dump(self, tree): '''Convenience methods to dump a given tree. @param tree: a parse tree object that supports a C{visit()} method ''' # FIXME - issue here is that we need to reset state - should be in __init__ self._text = [] self.context = [DumperContextElement(None, None, self._text)] tree.visit(self) if len(self.context) != 1: raise AssertionError, 'Unclosed tags on tree: %s' % self.context[-1].tag #~ import pprint; pprint.pprint(self._text) return self.get_lines() # FIXME - maybe just return text ? def get_lines(self): '''Return the dumped content as a list of lines Should only be called after closing the top level element ''' return u''.join(self._text).splitlines(1) def start(self, tag, attrib=None): if attrib: attrib = attrib.copy() # Ensure dumping does not change tree self.context.append(DumperContextElement(tag, attrib, [])) def text(self, text): assert not text is None if self.context[-1].tag != OBJECT: text = self.encode_text(self.context[-1].tag, text) self.context[-1].text.append(text) def end(self, tag): if not tag or tag != self.context[-1].tag: raise AssertionError, 'Unexpected tag closed: %s' % tag _, attrib, strings = self.context.pop() if tag in self.TAGS: assert strings, 'Can not append empty %s element' % tag start, end = self.TAGS[tag] strings.insert(0, start) strings.append(end) elif tag == FORMATTEDTEXT: pass else: try: method = getattr(self, 'dump_'+tag) except AttributeError: raise AssertionError, 'BUG: Unknown tag: %s' % tag strings = method(tag, attrib, strings) #~ try: #~ u''.join(strings) #~ except: #~ print "BUG: %s returned %s" % ('dump_'+tag, strings) if strings is not None: self.context[-1].text.extend(strings) def append(self, tag, attrib=None, text=None): strings = None if tag in self.TAGS: assert text is not None, 'Can not append empty %s element' % tag start, end = self.TAGS[tag] text = self.encode_text(tag, text) strings = [start, text, end] elif tag == FORMATTEDTEXT: if text is not None: strings = [self.encode_text(tag, text)] else: if attrib: attrib = attrib.copy() # Ensure dumping does not change tree try: method = getattr(self, 'dump_'+tag) except AttributeError: raise AssertionError, 'BUG: Unknown tag: %s' % tag if text is None: strings = method(tag, attrib, []) elif tag == OBJECT: strings = method(tag, attrib, [text]) else: strings = method(tag, attrib, [self.encode_text(tag, text)]) if strings is not None: self.context[-1].text.extend(strings) def encode_text(self, tag, text): '''Optional method to encode text elements in the output @note: Do not apply text encoding in the C{dump_} methods, the list of strings given there may contain prefix and postfix formatting of nested tags. @param tag: formatting tag @param text: text to be encoded @returns: encoded text @implementation: optional, default just returns unmodified input ''' return text def prefix_lines(self, prefix, strings): '''Convenience method to wrap a number of lines with e.g. an indenting sequence. @param prefix: a string to prefix each line @param strings: a list of pieces of text @returns: a new list of lines, each starting with prefix ''' lines = u''.join(strings).splitlines(1) return [prefix + l for l in lines] def dump_object(self, tag, attrib, strings=None): '''Dumps object using proper ObjectManager''' format = str(self.__class__.__module__).split('.')[-1] if 'type' in attrib: obj = ObjectManager.get_object(attrib['type'], attrib, u''.join(strings)) output = obj.dump(format, self, self.linker) if isinstance(output, basestring): return [output] elif output is not None: return output return self.dump_object_fallback(tag, attrib, strings) # TODO put content in attrib, use text for caption (with full recursion) # See img def dump_object_fallback(self, tag, attrib, strings=None): '''Method to serialize objects that do not have their own handler for this format. @implementation: must be implemented in sub-classes ''' raise NotImplementedError def isrtl(self, text): '''Check for Right To Left script @param text: the text to check @returns: C{True} if C{text} starts with characters in a RTL script, or C{None} if direction is not determined. ''' if pango is None: return None # It seems the find_base_dir() function is not documented in the # python language bindings. The Gtk C code shows the signature: # # pango.find_base_dir(text, length) # # It either returns a direction, or NEUTRAL if e.g. text only # contains punctuation but no real characters. dir = pango.find_base_dir(text, len(text)) if dir == pango.DIRECTION_NEUTRAL: return None else: return dir == pango.DIRECTION_RTL