def _get_translatable_blocks(events): # Default value encoding = 'utf-8' # To identify the begin/end format id = 0 id_stack = [] context_stack = [None] stream = None message = Message() skip_level = 0 for event in events: type, value, line = event # Set the good encoding if type == XML_DECL: encoding = value[1] # And now, we catch only the good events elif type == START_ELEMENT: if skip_level > 0: skip_level += 1 if stream: stream.append(event) continue else: tag_uri, tag_name, attributes = value schema = get_element_schema(tag_uri, tag_name) # Context management if schema.context is not None: context_stack.append(schema.context) # Skip content ? if schema.skip_content: skip_level = 1 if id_stack: stream = [event] continue # Is inline ? elif schema.is_inline: id += 1 id_stack.append(id) start_format = _make_start_format(tag_uri, tag_name, attributes, encoding) message.append_start_format(start_format, id, line) continue elif id_stack: skip_level = 1 stream = [event] continue elif type == END_ELEMENT: if skip_level > 0: skip_level -= 1 if stream: stream.append(event) if skip_level == 0: id += 1 aux = stream_to_str(stream, encoding) aux = unicode(aux, encoding) aux = [(aux, False, context_stack[-1])] message.append_start_format(aux, id, line) message.append_end_format([], id, line) stream = None continue else: tag_uri, tag_name = value[:2] schema = get_element_schema(tag_uri, tag_name) # Context management if schema.context is not None: context_stack.pop() # Is inline ? if schema.is_inline: message.append_end_format([(get_end_tag(value), False, None)], id_stack.pop(), line) continue elif type == TEXT: # Not empty ? if stream: stream.append(event) continue elif skip_level == 0 and (value.strip() != '' or message): value = XMLContent.encode(value) value = unicode(value, encoding) message.append_text(value, line, context_stack[-1]) continue elif type == COMMENT: if stream: stream.append(event) continue elif message: id += 1 if isinstance(value, str): value = unicode(value, encoding) value = u'<!--%s-->' % value message.append_start_format([(value, False, None)], id, line) message.append_end_format([], id, line) continue # Not a good event => break + send the event if message: yield MESSAGE, message, message.get_line() message = Message() yield event # Send the last message! if message: yield MESSAGE, message, message.get_line()
qname = get_qname(tag_uri, tag_name) s = '<%s' % qname # Output the attributes for attr_uri, attr_name in attributes: value = attributes[(attr_uri, attr_name)] qname = get_attribute_qname(attr_uri, attr_name) value = XMLAttribute.encode(value) s += ' %s="%s"' % (qname, value) return s + '>' stream_to_html_map = ( lambda x: '', # XML_DECL lambda x: get_doctype(x[0], x[1]), # DOCUMENT_TYPE get_start_tag, # START_ELEMENT lambda x: get_end_tag(x[0], x[1]), # END_ELEMENT XMLContent.encode, # TEXT lambda x: '<!--%s-->' % x, # COMMENT lambda x: '', # PI lambda x: x) # CDATA def stream_to_html(stream, encoding='UTF-8', map=stream_to_html_map): return stream_to_str(stream, encoding=encoding, map=map) def set_content_type(stream, content_type): key1 = (None, 'http-equiv') key2 = (None, 'content') for event in stream: type, value, line = event