Beispiel #1
0
def get_units(events, srx_handler=None):
    keep_spaces = False
    keep_spaces_level = 0
    for type, value, line in _get_translatable_blocks(events):
        if type == START_ELEMENT:
            tag_uri, tag_name, attributes = value
            # Attributes
            for attr_uri, attr_name in attributes:
                datatype = get_attr_datatype(tag_uri, tag_name, attr_uri,
                                             attr_name, attributes)
                if not issubclass(datatype, Unicode):
                    continue
                value = attributes[(attr_uri, attr_name)]
                if not value.strip():
                    continue
                unit = ((srx_TEXT, value),)
                yield (unit, _get_attr_context(datatype, tag_name, attr_name),
                       line)
            # Keep spaces ?
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces = True
                keep_spaces_level += 1
        elif type == END_ELEMENT:
            # Keep spaces ?
            tag_uri, tag_name = value
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces_level -= 1
                if keep_spaces_level == 0:
                    keep_spaces = False
        elif type == MESSAGE:
            # Segmentation
            for segment in get_segments(value, keep_spaces, srx_handler):
                yield segment
Beispiel #2
0
def get_units(events, srx_handler=None):
    keep_spaces = False
    keep_spaces_level = 0
    for type, value, line in _get_translatable_blocks(events):
        if type == START_ELEMENT:
            tag_uri, tag_name, attributes = value
            # Attributes
            for attr_uri, attr_name in attributes:
                datatype = get_attr_datatype(tag_uri, tag_name, attr_uri,
                                             attr_name, attributes)
                if not issubclass(datatype, Unicode):
                    continue
                value = attributes[(attr_uri, attr_name)]
                if not value.strip():
                    continue
                unit = ((srx_TEXT, value), )
                yield (unit, _get_attr_context(datatype, tag_name,
                                               attr_name), line)
            # Keep spaces ?
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces = True
                keep_spaces_level += 1
        elif type == END_ELEMENT:
            # Keep spaces ?
            tag_uri, tag_name = value
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces_level -= 1
                if keep_spaces_level == 0:
                    keep_spaces = False
        elif type == MESSAGE:
            # Segmentation
            for segment in get_segments(value, keep_spaces, srx_handler):
                yield segment
Beispiel #3
0
def _get_translatable_blocks(events):
    # Default value
    encoding = 'utf-8'

    # To identify the begin/end format
    id = 0
    id_stack = []
    context_stack = [None]
    stream = None

    message = Message()
    skip_level = 0
    for event in events:
        type, value, line = event

        # Set the good encoding
        if type == XML_DECL:
            encoding = value[1]
        # And now, we catch only the good events
        elif type == START_ELEMENT:
            if skip_level > 0:
                skip_level += 1
                if stream:
                    stream.append(event)
                    continue
            else:
                tag_uri, tag_name, attributes = value
                schema = get_element_schema(tag_uri, tag_name)

                # Context management
                if schema.context is not None:
                    context_stack.append(schema.context)

                # Skip content ?
                if schema.skip_content:
                    skip_level = 1
                    if id_stack:
                        stream = [event]
                        continue
                # Is inline ?
                elif schema.is_inline:
                    id += 1
                    id_stack.append(id)

                    start_format = _make_start_format(tag_uri, tag_name,
                                                      attributes, encoding)
                    message.append_start_format(start_format, id, line)
                    continue
                elif id_stack:
                    skip_level = 1
                    stream = [event]
                    continue
        elif type == END_ELEMENT:
            if skip_level > 0:
                skip_level -= 1
                if stream:
                    stream.append(event)
                    if skip_level == 0:
                        id += 1
                        aux = stream_to_str(stream, encoding)
                        aux = unicode(aux, encoding)
                        aux = [(aux, False, context_stack[-1])]
                        message.append_start_format(aux, id, line)
                        message.append_end_format([], id, line)
                        stream = None
                    continue
            else:
                tag_uri, tag_name = value[:2]
                schema = get_element_schema(tag_uri, tag_name)

                # Context management
                if schema.context is not None:
                    context_stack.pop()

                # Is inline ?
                if schema.is_inline:
                    message.append_end_format([(get_end_tag(value), False,
                                                None)], id_stack.pop(), line)
                    continue
        elif type == TEXT:
            # Not empty ?
            if stream:
                stream.append(event)
                continue
            elif skip_level == 0 and (value.strip() != '' or message):
                value = XMLContent.encode(value)
                value = unicode(value, encoding)
                message.append_text(value, line, context_stack[-1])
                continue
        elif type == COMMENT:
            if stream:
                stream.append(event)
                continue
            elif message:
                id += 1
                if isinstance(value, str):
                    value = unicode(value, encoding)
                value = u'<!--%s-->' % value
                message.append_start_format([(value, False, None)], id, line)
                message.append_end_format([], id, line)
                continue

        # Not a good event => break + send the event
        if message:
            yield MESSAGE, message, message.get_line()
            message = Message()

        yield event
    # Send the last message!
    if message:
        yield MESSAGE, message, message.get_line()
Beispiel #4
0
def translate(events, catalog, srx_handler=None):
    # Default values
    encoding = 'utf-8'
    doctype = None
    keep_spaces = False
    keep_spaces_level = 0
    namespaces = {}

    for event in _get_translatable_blocks(events):
        type, value, line = event

        # Set the good encoding
        if type == XML_DECL:
            encoding = value[1]
            yield event
        # Store the current DTD
        elif type == DOCUMENT_TYPE:
            name, doctype = value
            yield event
        # GO !
        elif type == START_ELEMENT:
            tag_uri, tag_name, attributes = value
            # Attributes (translate)
            for attr_uri, attr_name in attributes:
                value = attributes[(attr_uri, attr_name)]
                datatype = get_attr_datatype(tag_uri, tag_name, attr_uri,
                                             attr_name, attributes)
                if issubclass(datatype, Unicode):
                    value = value.strip()
                    if value:
                        value = datatype.decode(value, encoding)
                        unit = ((srx_TEXT, value),)
                        context = _get_attr_context(datatype,tag_name,
                                                    attr_name)
                        unit = catalog.gettext(unit, context)
                        value = unit[0][1]
                        value = value.encode(encoding)
                        attributes[(attr_uri, attr_name)] = value
                # Namespaces
                # FIXME We must support xmlns="...." too.
                # FIXME We must consider the end of the declaration
                if attr_uri == xmlns_uri:
                    namespaces[attr_name] = value
            yield START_ELEMENT, (tag_uri, tag_name, attributes), None
            # Keep spaces ?
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces = True
                keep_spaces_level += 1
        elif type == END_ELEMENT:
            yield event
            # Keep spaces ?
            tag_uri, tag_name = value
            schema = get_element_schema(tag_uri, tag_name)
            if schema.keep_spaces:
                keep_spaces_level -= 1
                if keep_spaces_level == 0:
                    keep_spaces = False
        elif type == MESSAGE:
            translation = translate_message(value, catalog, keep_spaces,
                                            srx_handler)
            try:
                for event in XMLParser(translation.encode(encoding),
                                       namespaces, doctype=doctype):
                    yield event
            except XMLError:
                raise XMLError, ('please have a look in your source file, '
                                 'line ~ %d:\n%s') % (line, value.to_str())
        else:
            yield event