def get_units(events, srx_handler=None): keep_spaces = False keep_spaces_level = 0 for type, value, line in _get_translatable_blocks(events): if type == START_ELEMENT: tag_uri, tag_name, attributes = value # Attributes for attr_uri, attr_name in attributes: datatype = get_attr_datatype(tag_uri, tag_name, attr_uri, attr_name, attributes) if not issubclass(datatype, Unicode): continue value = attributes[(attr_uri, attr_name)] if not value.strip(): continue unit = ((srx_TEXT, value),) yield (unit, _get_attr_context(datatype, tag_name, attr_name), line) # Keep spaces ? schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces = True keep_spaces_level += 1 elif type == END_ELEMENT: # Keep spaces ? tag_uri, tag_name = value schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces_level -= 1 if keep_spaces_level == 0: keep_spaces = False elif type == MESSAGE: # Segmentation for segment in get_segments(value, keep_spaces, srx_handler): yield segment
def get_units(events, srx_handler=None): keep_spaces = False keep_spaces_level = 0 for type, value, line in _get_translatable_blocks(events): if type == START_ELEMENT: tag_uri, tag_name, attributes = value # Attributes for attr_uri, attr_name in attributes: datatype = get_attr_datatype(tag_uri, tag_name, attr_uri, attr_name, attributes) if not issubclass(datatype, Unicode): continue value = attributes[(attr_uri, attr_name)] if not value.strip(): continue unit = ((srx_TEXT, value), ) yield (unit, _get_attr_context(datatype, tag_name, attr_name), line) # Keep spaces ? schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces = True keep_spaces_level += 1 elif type == END_ELEMENT: # Keep spaces ? tag_uri, tag_name = value schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces_level -= 1 if keep_spaces_level == 0: keep_spaces = False elif type == MESSAGE: # Segmentation for segment in get_segments(value, keep_spaces, srx_handler): yield segment
def _get_translatable_blocks(events): # Default value encoding = 'utf-8' # To identify the begin/end format id = 0 id_stack = [] context_stack = [None] stream = None message = Message() skip_level = 0 for event in events: type, value, line = event # Set the good encoding if type == XML_DECL: encoding = value[1] # And now, we catch only the good events elif type == START_ELEMENT: if skip_level > 0: skip_level += 1 if stream: stream.append(event) continue else: tag_uri, tag_name, attributes = value schema = get_element_schema(tag_uri, tag_name) # Context management if schema.context is not None: context_stack.append(schema.context) # Skip content ? if schema.skip_content: skip_level = 1 if id_stack: stream = [event] continue # Is inline ? elif schema.is_inline: id += 1 id_stack.append(id) start_format = _make_start_format(tag_uri, tag_name, attributes, encoding) message.append_start_format(start_format, id, line) continue elif id_stack: skip_level = 1 stream = [event] continue elif type == END_ELEMENT: if skip_level > 0: skip_level -= 1 if stream: stream.append(event) if skip_level == 0: id += 1 aux = stream_to_str(stream, encoding) aux = unicode(aux, encoding) aux = [(aux, False, context_stack[-1])] message.append_start_format(aux, id, line) message.append_end_format([], id, line) stream = None continue else: tag_uri, tag_name = value[:2] schema = get_element_schema(tag_uri, tag_name) # Context management if schema.context is not None: context_stack.pop() # Is inline ? if schema.is_inline: message.append_end_format([(get_end_tag(value), False, None)], id_stack.pop(), line) continue elif type == TEXT: # Not empty ? if stream: stream.append(event) continue elif skip_level == 0 and (value.strip() != '' or message): value = XMLContent.encode(value) value = unicode(value, encoding) message.append_text(value, line, context_stack[-1]) continue elif type == COMMENT: if stream: stream.append(event) continue elif message: id += 1 if isinstance(value, str): value = unicode(value, encoding) value = u'<!--%s-->' % value message.append_start_format([(value, False, None)], id, line) message.append_end_format([], id, line) continue # Not a good event => break + send the event if message: yield MESSAGE, message, message.get_line() message = Message() yield event # Send the last message! if message: yield MESSAGE, message, message.get_line()
def translate(events, catalog, srx_handler=None): # Default values encoding = 'utf-8' doctype = None keep_spaces = False keep_spaces_level = 0 namespaces = {} for event in _get_translatable_blocks(events): type, value, line = event # Set the good encoding if type == XML_DECL: encoding = value[1] yield event # Store the current DTD elif type == DOCUMENT_TYPE: name, doctype = value yield event # GO ! elif type == START_ELEMENT: tag_uri, tag_name, attributes = value # Attributes (translate) for attr_uri, attr_name in attributes: value = attributes[(attr_uri, attr_name)] datatype = get_attr_datatype(tag_uri, tag_name, attr_uri, attr_name, attributes) if issubclass(datatype, Unicode): value = value.strip() if value: value = datatype.decode(value, encoding) unit = ((srx_TEXT, value),) context = _get_attr_context(datatype,tag_name, attr_name) unit = catalog.gettext(unit, context) value = unit[0][1] value = value.encode(encoding) attributes[(attr_uri, attr_name)] = value # Namespaces # FIXME We must support xmlns="...." too. # FIXME We must consider the end of the declaration if attr_uri == xmlns_uri: namespaces[attr_name] = value yield START_ELEMENT, (tag_uri, tag_name, attributes), None # Keep spaces ? schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces = True keep_spaces_level += 1 elif type == END_ELEMENT: yield event # Keep spaces ? tag_uri, tag_name = value schema = get_element_schema(tag_uri, tag_name) if schema.keep_spaces: keep_spaces_level -= 1 if keep_spaces_level == 0: keep_spaces = False elif type == MESSAGE: translation = translate_message(value, catalog, keep_spaces, srx_handler) try: for event in XMLParser(translation.encode(encoding), namespaces, doctype=doctype): yield event except XMLError: raise XMLError, ('please have a look in your source file, ' 'line ~ %d:\n%s') % (line, value.to_str()) else: yield event