def parse_attrib(self, attrib):
		'''Convenience method to enforce the supported attributes and their
		types.
		@returns: a L{ConfigDict} using the C{object_attr} dict as definition
		'''
		if not isinstance(attrib, ConfigDict):
			attrib = ConfigDict(attrib)
			attrib.define(self.object_attr)
		return attrib
Example #2
0
class DumperClass(Visitor):
	'''Base class for dumper classes. Dumper classes serialize the content
	of a parse tree back to a text representation of the page content.
	Therefore this class implements the visitor API, so it can be
	used with any parse tree implementation or parser object that supports
	this API.

	To implement a dumper class, you need to define handlers for all
	tags that can appear in a page. Tags that are represented by a simple
	prefix and postfix string can be defined in the dictionary C{TAGS}.
	For example to define the italic tag in html output the dictionary
	should contain a definition like: C{EMPHASIS: ('<i>', '</i>')}.

	For tags that require more complex logic you can define a method to
	format the tag. Typical usage is to format link attributes in such
	a method. The method name should be C{dump_} + the name of the tag,
	e.g. C{dump_link()} for links (see the constants with tag names for
	the other tags). Such a sump method will get 3 arguments: the tag
	name itself, a dictionary with the tag attributes and a list of
	strings that form the tag content. The method should return a list
	of strings that represents the formatted text.

	This base class takes care of a stack of nested formatting tags and
	when a tag is closed either picks the appropriate prefix and postfix
	from C{TAGS} or calls the corresponding C{dump_} method. As a result
	tags are serialized depth-first.

	@ivar linker: the (optional) L{Linker} object, used to resolve links
	@ivar template_options: a L{ConfigDict} with options that may be set
	in a template (so inherently not safe !) to control the output style.
	Formats using this need to define the supported keys in the dict
	C{TEMPLATE_OPTIONS}.
	@ivar context: the stack of open tags maintained by this class. Can
	be used in C{dump_} methods to inspect the parent scope of the
	format. Elements on this stack have "tag", "attrib" and "text"
	attributes. Keep in mind that the parent scope is not yet complete
	when a tag is serialized.
	'''

	TAGS = {} #: dict mapping formatting tags to 2-tuples of a prefix and a postfix string

	TEMPLATE_OPTIONS = {} #: dict mapping ConfigDefinitions for template options

	def __init__(self, linker=None, template_options=None):
		self.linker = linker
		self.template_options = ConfigDict(template_options)
		self.template_options.define(self.TEMPLATE_OPTIONS)
		self.context = []
		self._text = []

	def dump(self, tree):
		'''Format a parsetree to text
		@param tree: a parse tree object that supports a C{visit()} method
		@returns: a list of lines
		'''
		# FIXME - issue here is that we need to reset state - should be in __init__
		self._text = []
		self.context = [DumperContextElement(None, None, self._text)]
		tree.visit(self)
		if len(self.context) != 1:
			raise AssertionError('Unclosed tags on tree: %s' % self.context[-1].tag)
		#~ import pprint; pprint.pprint(self._text)
		return self.get_lines() # FIXME - maybe just return text ?

	def get_lines(self):
		'''Return the dumped content as a list of lines
		Should only be called after closing the top level element
		'''
		return ''.join(self._text).splitlines(1)

	def start(self, tag, attrib=None):
		if attrib:
			attrib = attrib.copy() # Ensure dumping does not change tree
		self.context.append(DumperContextElement(tag, attrib, []))

	def text(self, text):
		assert not text is None
		if self.context[-1].tag != OBJECT:
			text = self.encode_text(self.context[-1].tag, text)
		self.context[-1].text.append(text)

	def end(self, tag):
		if not tag or tag != self.context[-1].tag:
			raise AssertionError('Unexpected tag closed: %s' % tag)
		_, attrib, strings = self.context.pop()

		if tag in self.TAGS:
			assert strings, 'Can not append empty %s element' % tag
			start, end = self.TAGS[tag]
			strings.insert(0, start)
			strings.append(end)
		elif tag == FORMATTEDTEXT:
			pass
		else:
			try:
				method = getattr(self, 'dump_' + tag)
			except AttributeError:
				raise AssertionError('BUG: Unknown tag: %s' % tag)

			strings = method(tag, attrib, strings)
			#~ try:
				#~ u''.join(strings)
			#~ except:
				#~ print("BUG: %s returned %s" % ('dump_'+tag, strings))

		if strings is not None:
			self.context[-1].text.extend(strings)

	def append(self, tag, attrib=None, text=None):
		strings = None
		if tag in self.TAGS:
			assert text is not None, 'Can not append empty %s element' % tag
			start, end = self.TAGS[tag]
			text = self.encode_text(tag, text)
			strings = [start, text, end]
		elif tag == FORMATTEDTEXT:
			if text is not None:
				strings = [self.encode_text(tag, text)]
		else:
			if attrib:
				attrib = attrib.copy() # Ensure dumping does not change tree

			try:
				method = getattr(self, 'dump_' + tag)
			except AttributeError:
				raise AssertionError('BUG: Unknown tag: %s' % tag)

			if text is None:
				strings = method(tag, attrib, [])
			elif tag == OBJECT:
				strings = method(tag, attrib, [text])
			else:
				strings = method(tag, attrib, [self.encode_text(tag, text)])

		if strings is not None:
			self.context[-1].text.extend(strings)

	def encode_text(self, tag, text):
		'''Optional method to encode text elements in the output

		@note: Do not apply text encoding in the C{dump_} methods, the
		list of strings given there may contain prefix and postfix
		formatting of nested tags.

		@param tag: formatting tag
		@param text: text to be encoded
		@returns: encoded text
		@implementation: optional, default just returns unmodified input
		'''
		return text

	def prefix_lines(self, prefix, strings):
		'''Convenience method to wrap a number of lines with e.g. an
		indenting sequence.
		@param prefix: a string to prefix each line
		@param strings: a list of pieces of text
		@returns: a new list of lines, each starting with prefix
		'''
		lines = ''.join(strings).splitlines(1)
		return [prefix + l for l in lines]

	def dump_object(self, tag, attrib, strings=[]):
		'''Dumps objects defined by L{InsertedObjectType}'''
		format = str(self.__class__.__module__).split('.')[-1]
		try:
			obj = PluginManager.insertedobjects[attrib['type']]
		except KeyError:
			pass
		else:
			try:
				output = obj.format(format, self, attrib, ''.join(strings))
			except ValueError:
				pass
			else:
				assert isinstance(output, (list, tuple)), "Invalid output: %r" % output
				return output

		if attrib['type'].startswith('image+'):
			# Fallback for backward compatibility of image generators < zim 0.70
			attrib = attrib.copy()
			attrib['type'] = attrib['type'][6:]
			return self.dump_img(IMAGE, attrib, None)
		else:
			return self.dump_object_fallback(tag, attrib, strings)

	def dump_object_fallback(self, tag, attrib, strings=None):
		'''Method to serialize objects that do not have their own
		handler for this format.
		@implementation: must be implemented in sub-classes
		'''
		raise NotImplementedError

	def isrtl(self, text):
		'''Check for Right To Left script
		@param text: the text to check
		@returns: C{True} if C{text} starts with characters in a
		RTL script, or C{None} if direction is not determined.
		'''
		if Pango is None:
			return None

		# It seems the find_base_dir() function is not documented in the
		# python language bindings. The Gtk C code shows the signature:
		#
		#     Pango.find_base_dir(text, length)
		#
		# It either returns a direction, or NEUTRAL if e.g. text only
		# contains punctuation but no real characters.

		dir = Pango.find_base_dir(text, len(text))
		if dir == Pango.Direction.NEUTRAL:
			return None
		else:
			return dir == Pango.Direction.RTL
Example #3
0
class DumperClass(Visitor):
	'''Base class for dumper classes. Dumper classes serialize the content
	of a parse tree back to a text representation of the page content.
	Therefore this class implements the visitor API, so it can be
	used with any parse tree implementation or parser object that supports
	this API.

	To implement a dumper class, you need to define handlers for all
	tags that can appear in a page. Tags that are represented by a simple
	prefix and postfix string can be defined in the dictionary C{TAGS}.
	For example to define the italic tag in html output the dictionary
	should contain a definition like: C{EMPHASIS: ('<i>', '</i>')}.

	For tags that require more complex logic you can define a method to
	format the tag. Typical usage is to format link attributes in such
	a method. The method name should be C{dump_} + the name of the tag,
	e.g. C{dump_link()} for links (see the constants with tag names for
	the other tags). Such a sump method will get 3 arguments: the tag
	name itself, a dictionary with the tag attributes and a list of
	strings that form the tag content. The method should return a list
	of strings that represents the formatted text.

	This base class takes care of a stack of nested formatting tags and
	when a tag is closed either picks the appropriate prefix and postfix
	from C{TAGS} or calls the corresponding C{dump_} method. As a result
	tags are serialized depth-first.

	@ivar linker: the (optional) L{Linker} object, used to resolve links
	@ivar template_options: a L{ConfigDict} with options that may be set
	in a template (so inherently not safe !) to control the output style.
	Formats using this need to define the supported keys in the dict
	C{TEMPLATE_OPTIONS}.
	@ivar context: the stack of open tags maintained by this class. Can
	be used in C{dump_} methods to inspect the parent scope of the
	format. Elements on this stack have "tag", "attrib" and "text"
	attributes. Keep in mind that the parent scope is not yet complete
	when a tag is serialized.
	'''

	TAGS = {} #: dict mapping formatting tags to 2-tuples of a prefix and a postfix string

	TEMPLATE_OPTIONS = {} #: dict mapping ConfigDefinitions for template options

	def __init__(self, linker=None, template_options=None):
		self.linker = linker
		self.template_options = ConfigDict(template_options)
		self.template_options.define(self.TEMPLATE_OPTIONS)
		self.context = []
		self._text = []

	def dump(self, tree):
		'''Convenience methods to dump a given tree.
		@param tree: a parse tree object that supports a C{visit()} method
		'''
		# FIXME - issue here is that we need to reset state - should be in __init__
		self._text = []
		self.context = [DumperContextElement(None, None, self._text)]
		tree.visit(self)
		if len(self.context) != 1:
			raise AssertionError, 'Unclosed tags on tree: %s' % self.context[-1].tag
		#~ import pprint; pprint.pprint(self._text)
		return self.get_lines() # FIXME - maybe just return text ?

	def get_lines(self):
		'''Return the dumped content as a list of lines
		Should only be called after closing the top level element
		'''
		return u''.join(self._text).splitlines(1)

	def start(self, tag, attrib=None):
		if attrib:
			attrib = attrib.copy() # Ensure dumping does not change tree
		self.context.append(DumperContextElement(tag, attrib, []))

	def text(self, text):
		assert not text is None
		if self.context[-1].tag != OBJECT:
			text = self.encode_text(self.context[-1].tag, text)
		self.context[-1].text.append(text)

	def end(self, tag):
		if not tag or tag != self.context[-1].tag:
			raise AssertionError, 'Unexpected tag closed: %s' % tag
		_, attrib, strings = self.context.pop()

		if tag in self.TAGS:
			assert strings, 'Can not append empty %s element' % tag
			start, end = self.TAGS[tag]
			strings.insert(0, start)
			strings.append(end)
		elif tag == FORMATTEDTEXT:
			pass
		else:
			try:
				method = getattr(self, 'dump_'+tag)
			except AttributeError:
				raise AssertionError, 'BUG: Unknown tag: %s' % tag

			strings = method(tag, attrib, strings)
			#~ try:
				#~ u''.join(strings)
			#~ except:
				#~ print "BUG: %s returned %s" % ('dump_'+tag, strings)

		if strings is not None:
			self.context[-1].text.extend(strings)

	def append(self, tag, attrib=None, text=None):
		strings = None
		if tag in self.TAGS:
			assert text is not None, 'Can not append empty %s element' % tag
			start, end = self.TAGS[tag]
			text = self.encode_text(tag, text)
			strings = [start, text, end]
		elif tag == FORMATTEDTEXT:
			if text is not None:
				strings = [self.encode_text(tag, text)]
		else:
			if attrib:
				attrib = attrib.copy() # Ensure dumping does not change tree

			try:
				method = getattr(self, 'dump_'+tag)
			except AttributeError:
				raise AssertionError, 'BUG: Unknown tag: %s' % tag

			if text is None:
				strings = method(tag, attrib, [])
			elif tag == OBJECT:
				strings = method(tag, attrib, [text])
			else:
				strings = method(tag, attrib, [self.encode_text(tag, text)])

		if strings is not None:
			self.context[-1].text.extend(strings)

	def encode_text(self, tag, text):
		'''Optional method to encode text elements in the output

		@note: Do not apply text encoding in the C{dump_} methods, the
		list of strings given there may contain prefix and postfix
		formatting of nested tags.

		@param tag: formatting tag
		@param text: text to be encoded
		@returns: encoded text
		@implementation: optional, default just returns unmodified input
		'''
		return text

	def prefix_lines(self, prefix, strings):
		'''Convenience method to wrap a number of lines with e.g. an
		indenting sequence.
		@param prefix: a string to prefix each line
		@param strings: a list of pieces of text
		@returns: a new list of lines, each starting with prefix
		'''
		lines = u''.join(strings).splitlines(1)
		return [prefix + l for l in lines]

	def dump_object(self, tag, attrib, strings=None):
		'''Dumps object using proper ObjectManager'''
		format = str(self.__class__.__module__).split('.')[-1]
		if 'type' in attrib:
			obj = ObjectManager.get_object(attrib['type'], attrib, u''.join(strings))
			output = obj.dump(format, self, self.linker)
			if isinstance(output, basestring):
				return [output]
			elif output is not None:
				return output

		return self.dump_object_fallback(tag, attrib, strings)

		# TODO put content in attrib, use text for caption (with full recursion)
		# See img

	def dump_object_fallback(self, tag, attrib, strings=None):
		'''Method to serialize objects that do not have their own
		handler for this format.
		@implementation: must be implemented in sub-classes
		'''
		raise NotImplementedError

	def isrtl(self, text):
		'''Check for Right To Left script
		@param text: the text to check
		@returns: C{True} if C{text} starts with characters in a
		RTL script, or C{None} if direction is not determined.
		'''
		if pango is None:
			return None

		# It seems the find_base_dir() function is not documented in the
		# python language bindings. The Gtk C code shows the signature:
		#
		#     pango.find_base_dir(text, length)
		#
		# It either returns a direction, or NEUTRAL if e.g. text only
		# contains punctuation but no real characters.

		dir = pango.find_base_dir(text, len(text))
		if dir == pango.DIRECTION_NEUTRAL:
			return None
		else:
			return dir == pango.DIRECTION_RTL