Python Transcriber Beispiele, openformats.transcribers.Transcriber Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: indesign.py Projekt: transifex/openformats

    def _compile_story(self, story_content):
        """ Handles the compilation of a single story
        args:
            story_content: the xml content of the story
        returns:
            compiled_story: the compiled story content
        """
        transcriber = Transcriber(story_content)
        hash_regex = re.compile(ensure_unicode(r'[a-z,0-9]{32}_tr'))
        found = True
        while found:
            try:
                current_string = self.stringset.pop(0)
                hash_position = story_content.index(
                    current_string.template_replacement
                )
            except ValueError:
                found = False
                self.stringset.insert(0, current_string)
            except IndexError:
                break
            else:
                transcriber.copy_until(hash_position)
                transcriber.add(current_string.string)
                transcriber.skip(len(current_string.template_replacement))

        # Update the XML file to contain the template strings
        transcriber.copy_until(len(story_content))
        compiled_story = transcriber.get_destination()
        # in case there are any hashes that have not been replaced, replace
        # them with an empty string
        compiled_story = hash_regex.sub(u'', compiled_story)
        return compiled_story

Beispiel #2

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

    def parse(self, content, **kwargs):
        stringset = []
        if isinstance(content, six.binary_type):
            content = content.decode("utf-8")  # convert to unicode

        resources_tag_position = content.index("<resources")

        self.transcriber = Transcriber(content[resources_tag_position:])
        source = self.transcriber.source

        self._order = 0

        resources_tag = DumbXml(source)
        last_comment = ""
        for tag, offset in resources_tag.find(
            ("string-array", "string", "plurals", DumbXml.COMMENT)):
            if self._should_ignore(tag):
                last_comment = ""
                continue
            if tag.name == DumbXml.COMMENT:
                last_comment = tag.inner
                self.transcriber.copy_until(offset + len(tag.content))
            elif tag.name == "string":
                string = self._handle_string_tag(tag, offset, last_comment)
                last_comment = ""
                if string is not None:
                    stringset.append(string)
            elif tag.name == "string-array":
                for string in self._handle_string_array_tag(
                        tag, offset, last_comment):
                    if string is not None:
                        stringset.append(string)
                last_comment = ""
            elif tag.name == "plurals":
                string = self._handle_plurals_tag(tag, offset, last_comment)
                if string is not None:
                    stringset.append(string)
                last_comment = ""

        self.transcriber.copy_until(len(source))

        template = content[:resources_tag_position] +\
            self.transcriber.get_destination()

        self.transcriber = None

        return template, stringset

Beispiel #3

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

    def compile(self, template, stringset, **kwargs):
        resources_tag_position = template.index("<resources")
        self._stringset = list(stringset)
        self._stringset_index = 0

        self.transcriber = Transcriber(template[resources_tag_position:])
        self.source = self.transcriber.source

        resources_tag = DumbXml(self.source)

        for tag, offset in resources_tag.find(("string", "string-array",
                                               "plurals")):
            if self._should_ignore(tag):
                continue
            if tag.name == "string":
                self._compile_string(tag, offset)
            elif tag.name == "string-array":
                self._compile_string_array(tag, offset)
            elif tag.name == "plurals":
                self._compile_plurals(tag, offset)
        self.transcriber.copy_until(len(self.source))

        # Lets do another pass to clear empty <string-array>s
        self.transcriber = Transcriber(self.transcriber.get_destination())
        self.source = self.transcriber.source
        resources_tag = DumbXml(self.source)
        for string_array_tag, string_array_offset in resources_tag.find(
                "string-array"):
            if (string_array_tag.inner and
                    len(list(string_array_tag.find("item"))) == 0):
                self.transcriber.copy_until(string_array_offset)
                self.transcriber.skip(len(string_array_tag.content))
        self.transcriber.copy_until(len(self.source))

        compiled = template[:resources_tag_position] +\
            self.transcriber.get_destination()

        self._stringset = None
        self._stringset_index = None
        self.transcriber = None

        return compiled

Beispiel #4

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

    def compile(self, template, stringset, **kwargs):
        resources_tag_position = template.index("<resources")
        self._stringset = list(stringset)
        self._stringset_index = 0

        self.transcriber = Transcriber(template[resources_tag_position:])
        self.source = self.transcriber.source

        resources_tag = DumbXml(self.source)

        for tag, offset in resources_tag.find(
            ("string", "string-array", "plurals")):
            if self._should_ignore(tag):
                continue
            if tag.name == "string":
                self._compile_string(tag, offset)
            elif tag.name == "string-array":
                self._compile_string_array(tag, offset)
            elif tag.name == "plurals":
                self._compile_plurals(tag, offset)
        self.transcriber.copy_until(len(self.source))

        # Lets do another pass to clear empty <string-array>s
        self.transcriber = Transcriber(self.transcriber.get_destination())
        self.source = self.transcriber.source
        resources_tag = DumbXml(self.source)
        for string_array_tag, string_array_offset in resources_tag.find(
                "string-array"):
            if (string_array_tag.inner
                    and len(list(string_array_tag.find("item"))) == 0):
                self.transcriber.copy_until(string_array_offset)
                self.transcriber.skip(len(string_array_tag.content))
        self.transcriber.copy_until(len(self.source))

        compiled = template[:resources_tag_position] +\
            self.transcriber.get_destination()

        self._stringset = None
        self._stringset_index = None
        self.transcriber = None

        return compiled

Beispiel #5

0

Datei anzeigen

    def parse(self, content):
        self.transcriber = Transcriber(content)
        source = self.transcriber.source
        stringset = []
        self.max_order = None
        for start, subtitle_section in self._generate_split_subtitles(source):
            self.transcriber.copy_until(start)
            offset, string = self._parse_section(start, subtitle_section)

            if string:
                stringset.append(string)

                self.transcriber.copy_until(offset)
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(string.string))
            else:
                self.transcriber.copy_until(start + len(subtitle_section))

        self.transcriber.copy_until(len(source))

        template = self.transcriber.get_destination()
        return template, stringset

Beispiel #6

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

    def parse(self, content, **kwargs):
        stringset = []
        if isinstance(content, six.binary_type):
            content = content.decode("utf-8")  # convert to unicode

        resources_tag_position = content.index("<resources")

        self.transcriber = Transcriber(content[resources_tag_position:])
        source = self.transcriber.source

        self._order = 0

        resources_tag = DumbXml(source)
        last_comment = ""
        for tag, offset in resources_tag.find(("string-array", "string",
                                               "plurals", DumbXml.COMMENT)):
            if self._should_ignore(tag):
                last_comment = ""
                continue
            if tag.name == DumbXml.COMMENT:
                last_comment = tag.inner
                self.transcriber.copy_until(offset + len(tag.content))
            elif tag.name == "string":
                string = self._handle_string_tag(tag, offset, last_comment)
                last_comment = ""
                if string is not None:
                    stringset.append(string)
            elif tag.name == "string-array":
                for string in self._handle_string_array_tag(tag, offset,
                                                            last_comment):
                    if string is not None:
                        stringset.append(string)
                last_comment = ""
            elif tag.name == "plurals":
                string = self._handle_plurals_tag(tag, offset, last_comment)
                if string is not None:
                    stringset.append(string)
                last_comment = ""

        self.transcriber.copy_until(len(source))

        template = content[:resources_tag_position] +\
            self.transcriber.get_destination()

        self.transcriber = None

        return template, stringset

Beispiel #7

0

Datei anzeigen

Datei: srt.py Projekt: transifex/openformats

    def parse(self, content):
        self.transcriber = Transcriber(content)
        source = self.transcriber.source
        stringset = []
        self.max_order = None
        for start, subtitle_section in self._generate_split_subtitles(source):
            self.transcriber.copy_until(start)
            offset, string = self._parse_section(start, subtitle_section)

            if string:
                stringset.append(string)

                self.transcriber.copy_until(offset)
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(string.string))
            else:
                self.transcriber.copy_until(start + len(subtitle_section))

        self.transcriber.copy_until(len(source))

        template = self.transcriber.get_destination()
        return template, stringset

Beispiel #8

0

Datei anzeigen

Datei: compilers.py Projekt: transifex/openformats

    def compile(self, template, stringset, **kwargs):
        # Fix regex encoding
        space_pattern = re.compile(ensure_unicode(self.SPACE_PAT))

        # assume stringset is ordered within the template
        transcriber = Transcriber(template)
        template = transcriber.source

        for string in stringset:
            hash_position = template.index(string.template_replacement)
            if not string.pluralized:
                transcriber.copy_until(hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
            else:
                # if the hash is on its own on a line with only spaces, we have
                # to remember it's indent
                indent_length = template[hash_position::-1].index('\n') - 1
                indent = template[hash_position - indent_length:hash_position]
                tail_length = template[
                    hash_position + len(string.template_replacement):
                ].index('\n')
                tail = template[
                    hash_position + len(string.template_replacement):
                    hash_position + len(string.template_replacement) +
                    tail_length
                ]
                if (space_pattern.search(indent) and
                        space_pattern.search(tail)):
                    transcriber.copy_until(hash_position - indent_length)
                    for rule, value in six.iteritems(string.string):
                        transcriber.add(
                            indent + self.plural_template.format(
                                rule=self.RULES_ITOA[rule], string=value
                            ) + tail + '\n'
                        )
                    transcriber.skip(indent_length +
                                     len(string.template_replacement) +
                                     tail_length + 1)
                else:
                    # string is not on its own, simply replace hash with all
                    # plural forms
                    transcriber.copy_until(hash_position)
                    for rule, value in six.iteritems(string.string):
                        transcriber.add(self.plural_template.format(
                            rule=self.RULES_ITOA[rule], string=value
                        ))
                    transcriber.skip(len(string.template_replacement))

        transcriber.copy_until(len(template))
        compiled = transcriber.get_destination()

        return compiled

Beispiel #9

0

Datei anzeigen

Datei: srt.py Projekt: transifex/openformats

    def compile(self, template, stringset, **kwargs):
        transcriber = Transcriber(template)
        template = transcriber.source
        stringset = iter(stringset)
        string = next(stringset)

        for start, subtitle_section in self.\
                _generate_split_subtitles(template):
            transcriber.copy_until(start)
            transcriber.mark_section_start()

            # Hash is supposed to follow second newline character
            first_newline = subtitle_section.index('\n')
            second_newline = subtitle_section.index('\n', first_newline + 1)
            hash_position = second_newline + 1

            if (subtitle_section[
                    hash_position:
                    hash_position + len(string.template_replacement)
                    ] == string.template_replacement):
                # found it
                transcriber.copy_until(start + hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                try:
                    string = next(stringset)
                except StopIteration:
                    pass
            else:
                # did not find it, must remove section
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                transcriber.remove_section()

        transcriber.copy_until(len(template))
        return transcriber.get_destination()

Beispiel #10

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

class BetaAndroidHandler(Handler):
    name = "BETA_ANDROID"
    extension = "xml"

    plural_template = u'<item quantity="{rule}">{string}</item>'
    SPACE_PAT = re.compile(r'^\s*$')
    # Atttibutes that designate a string should be filtered out
    FILTER_ATTRIBUTES = {
        'translatable': 'false'
    }

    EXTRACTS_RAW = False

    SPECIFIER = re.compile(
        r'%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#\- 0]*(?:\d+)?'
        r'(?:\.\d+)?(hh\|h\|l\|ll|j|z|t|L)?(?P<type>[diufFeEgGxXaAoscpn%])))'
    )

    def parse(self, content, **kwargs):
        stringset = []
        if isinstance(content, six.binary_type):
            content = content.decode("utf-8")  # convert to unicode

        resources_tag_position = content.index("<resources")

        self.transcriber = Transcriber(content[resources_tag_position:])
        source = self.transcriber.source

        self._order = 0

        resources_tag = DumbXml(source)
        last_comment = ""
        for tag, offset in resources_tag.find(("string-array", "string",
                                               "plurals", DumbXml.COMMENT)):
            if self._should_ignore(tag):
                last_comment = ""
                continue
            if tag.name == DumbXml.COMMENT:
                last_comment = tag.inner
                self.transcriber.copy_until(offset + len(tag.content))
            elif tag.name == "string":
                string = self._handle_string_tag(tag, offset, last_comment)
                last_comment = ""
                if string is not None:
                    stringset.append(string)
            elif tag.name == "string-array":
                for string in self._handle_string_array_tag(tag, offset,
                                                            last_comment):
                    if string is not None:
                        stringset.append(string)
                last_comment = ""
            elif tag.name == "plurals":
                string = self._handle_plurals_tag(tag, offset, last_comment)
                if string is not None:
                    stringset.append(string)
                last_comment = ""

        self.transcriber.copy_until(len(source))

        template = content[:resources_tag_position] +\
            self.transcriber.get_destination()

        self.transcriber = None

        return template, stringset

    def _handle_string_tag(self, tag, offset, comment):
        string = None
        if tag.inner.strip() != "":
            context = tag.attrs.get('product', "")
            string = OpenString(tag.attrs['name'], tag.inner,
                                context=context, order=self._order,
                                developer_comment=comment)
            self._order += 1

        # ... <string name="foo">Hello ....
        #                        ^
        self.transcriber.copy_until(offset + tag.inner_offset)

        # ... ing name="foo">Hello world</stri...
        #                               ^
        if string is not None:
            self.transcriber.add(string.template_replacement)
            self.transcriber.skip(len(tag.inner))
        else:
            self.transcriber.copy_until(offset + tag.inner_offset +
                                        len(tag.inner))

        # ...ello World</string>
        #                       ^
        self.transcriber.copy_until(offset + len(tag.content))

        return string

    def _handle_string_array_tag(self, string_array_tag, string_array_offset,
                                 comment):
        # ...ing-array>   <item>H...
        #              ^
        self.transcriber.copy_until(string_array_offset +
                                    string_array_tag.inner_offset)

        context = string_array_tag.attrs.get('product', "")
        for index, (item_tag, item_offset) in enumerate(
                string_array_tag.find('item')):
            string = None
            if item_tag.inner.strip() != "":
                string = OpenString(
                    "{}[{}]".format(string_array_tag.attrs['name'], index),
                    item_tag.inner,
                    context=context,
                    order=self._order,
                    developer_comment=comment
                )
                self._order += 1
                yield string

            # ... <item>Hello...
            #           ^
            self.transcriber.copy_until(string_array_offset + item_offset +
                                        item_tag.inner_offset)

            # ...ello world</item>...
            #              ^
            if string is not None:
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(item_tag.inner))
            else:
                self.transcriber.copy_until(string_array_offset + item_offset +
                                            item_tag.inner_offset)

            # orld</item>   <it...
            #            ^
            self.transcriber.copy_until(
                string_array_offset + item_offset + item_tag.inner_offset +
                len(item_tag.content)
            )

        # </item>  </string-array>
        #                         ^
        self.transcriber.copy_until(string_array_offset +
                                    len(string_array_tag.content))

    def _handle_plurals_tag(self, plurals_tag, plurals_offset, comment):
        # <plurals name="foo">   <item>Hello ...
        #                     ^
        self.transcriber.copy_until(plurals_offset + plurals_tag.inner_offset)

        first_item_offset = None
        strings = {}
        for item_tag, item_offset in plurals_tag.find('item'):
            if item_tag.inner.strip() == "":
                strings = None
                break

            first_item_offset = first_item_offset or item_offset

            rule = self.get_rule_number(item_tag.attrs['quantity'])
            strings[rule] = item_tag.inner
        last_item_tag, last_item_offset = item_tag, item_offset

        if strings is not None:
            context = plurals_tag.attrs.get('product', "")
            string = OpenString(plurals_tag.attrs['name'], strings,
                                pluralized=True,
                                context=context, order=self._order,
                                developer_comment=comment)
            self._order += 1

            # <plurals name="foo">   <item>Hello ...
            #                        ^
            self.transcriber.copy_until(plurals_offset + first_item_offset)

            # ...</item>   </plurals>...
            #           ^
            self.transcriber.add(string.template_replacement)
            self.transcriber.skip(last_item_offset +
                                  len(last_item_tag.content) -
                                  first_item_offset)

        else:
            string = None

        # ...</plurals> ...
        #              ^
        self.transcriber.copy_until(plurals_offset + len(plurals_tag.content))

        return string

    def _should_ignore(self, tag):
        """
        If the tag has a key: value elemement that matches FILTER_ATTRIBUTES
        it will return True, else it returns False
        """
        for key, value in six.iteritems(self.FILTER_ATTRIBUTES):
            filter_attr = tag.attrs.get(key, None)
            if filter_attr is not None and filter_attr == value:
                return True
        return False

    def compile(self, template, stringset, **kwargs):
        resources_tag_position = template.index("<resources")
        self._stringset = list(stringset)
        self._stringset_index = 0

        self.transcriber = Transcriber(template[resources_tag_position:])
        self.source = self.transcriber.source

        resources_tag = DumbXml(self.source)

        for tag, offset in resources_tag.find(("string", "string-array",
                                               "plurals")):
            if self._should_ignore(tag):
                continue
            if tag.name == "string":
                self._compile_string(tag, offset)
            elif tag.name == "string-array":
                self._compile_string_array(tag, offset)
            elif tag.name == "plurals":
                self._compile_plurals(tag, offset)
        self.transcriber.copy_until(len(self.source))

        # Lets do another pass to clear empty <string-array>s
        self.transcriber = Transcriber(self.transcriber.get_destination())
        self.source = self.transcriber.source
        resources_tag = DumbXml(self.source)
        for string_array_tag, string_array_offset in resources_tag.find(
                "string-array"):
            if (string_array_tag.inner and
                    len(list(string_array_tag.find("item"))) == 0):
                self.transcriber.copy_until(string_array_offset)
                self.transcriber.skip(len(string_array_tag.content))
        self.transcriber.copy_until(len(self.source))

        compiled = template[:resources_tag_position] +\
            self.transcriber.get_destination()

        self._stringset = None
        self._stringset_index = None
        self.transcriber = None

        return compiled

    def _compile_string(self, string_tag, string_offset):
        try:
            next_string = self._stringset[self._stringset_index]
        except IndexError:
            next_string = None
        if (next_string is not None and
                next_string.template_replacement == string_tag.inner):
            # found one to replace
            self._stringset_index += 1

            self.transcriber.copy_until(string_offset +
                                        string_tag.inner_offset)
            self.transcriber.add(next_string.string)
            self.transcriber.skip(len(string_tag.inner))
            self.transcriber.copy_until(string_offset +
                                        len(string_tag.content))

        else:
            # didn't find it, must remove by skipping it
            self.transcriber.copy_until(string_offset)
            self.transcriber.skip(len(string_tag.content))

    def _compile_string_array(self, string_array_tag, string_array_offset):
        self.transcriber.copy_until(string_array_offset +
                                    string_array_tag.inner_offset)
        for item_tag, item_offset in string_array_tag.find("item"):
            try:
                next_string = self._stringset[self._stringset_index]
            except IndexError:
                next_string = None
            if (next_string is not None and
                    next_string.template_replacement == item_tag.inner):
                # found one to replace
                self._stringset_index += 1

                self.transcriber.copy_until(string_array_offset + item_offset +
                                            item_tag.inner_offset)
                self.transcriber.add(next_string.string)
                self.transcriber.skip(len(item_tag.inner))
                self.transcriber.copy_until(string_array_offset + item_offset +
                                            len(item_tag.content))

            else:
                # didn't find it, must remove by skipping it
                self.transcriber.copy_until(string_array_offset + item_offset)
                self.transcriber.skip(len(item_tag.content))
        self.transcriber.copy_until(string_array_offset +
                                    len(string_array_tag.content))

    def _compile_plurals(self, plurals_tag, plurals_offset):
        try:
            next_string = self._stringset[self._stringset_index]
        except IndexError:
            next_string = None
        if (next_string is not None and
                next_string.template_replacement == plurals_tag.inner.strip()):
            # found one to replace, if the hash is on its own on a line with
            # only spaces, we have to remember it's indent
            self._stringset_index += 1

            is_multiline = True
            indent_length = tail_length = 0
            try:
                hash_position = plurals_offset + plurals_tag.inner_offset +\
                    plurals_tag.inner.index(next_string.template_replacement)
                indent_length = self.source[hash_position::-1].\
                    index('\n') - 1
                indent = self.source[hash_position -
                                     indent_length:hash_position]
                end_of_hash = (hash_position +
                               len(next_string.template_replacement))
                tail_length = self.source[end_of_hash:].index('\n')
                tail = self.source[end_of_hash:end_of_hash + tail_length]
            except ValueError:
                is_multiline = False

            is_multiline = (is_multiline and
                            (self.SPACE_PAT.search(indent) and
                             self.SPACE_PAT.search(tail)))

            if is_multiline:
                # write until beginning of hash
                self.transcriber.copy_until(hash_position - indent_length)
                for rule, value in six.iteritems(next_string.string):
                    self.transcriber.add(
                        indent +
                        self.plural_template.format(
                            rule=self.get_rule_string(rule), string=value
                        ) +
                        tail + '\n'
                    )
                self.transcriber.skip(indent_length +
                                      len(next_string.template_replacement) +
                                      tail_length + 1)

            else:
                # string is not on its own, simply replace hash with all plural
                # forms
                self.transcriber.copy_until(hash_position)
                for rule, value in six.iteritems(next_string.string):
                    self.transcriber.add(
                        self.plural_template.format(
                            rule=self.get_rule_string(rule), string=value
                        )
                    )
                self.transcriber.skip(indent_length +
                                      len(next_string.template_replacement) +
                                      tail_length)

            # finish up by copying until the end of </plurals>
            self.transcriber.copy_until(plurals_offset +
                                        len(plurals_tag.content))

        else:
            # didn't find it, must remove by skipping it
            self.transcriber.copy_until(plurals_offset)
            self.transcriber.skip_until(plurals_offset +
                                        len(plurals_tag.content))

Beispiel #11

0

Datei anzeigen

class SrtHandler(Handler):
    name = "SRT"
    extension = "srt"
    EXTRACTS_RAW = False

    NON_SPACE_PAT = re.compile(r'[^\s]')

    def _generate_split_subtitles(self, content, **kwargs):
        start = 0
        for section in content.split('\n\n'):
            # find first non-space character of section
            match = self.NON_SPACE_PAT.search(section)
            if match:
                yield start + match.start(), section.strip()
            start += len(section) + 2

    def parse(self, content):
        self.transcriber = Transcriber(content)
        source = self.transcriber.source
        stringset = []
        self.max_order = None
        for start, subtitle_section in self._generate_split_subtitles(source):
            self.transcriber.copy_until(start)
            offset, string = self._parse_section(start, subtitle_section)

            if string:
                stringset.append(string)

                self.transcriber.copy_until(offset)
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(string.string))
            else:
                self.transcriber.copy_until(start + len(subtitle_section))

        self.transcriber.copy_until(len(source))

        template = self.transcriber.get_destination()
        return template, stringset

    def _parse_section(self, offset, section):
        try:
            order_str, timings, string = section.split('\n', 2)
        except ValueError:
            raise ParseError(
                u"Not enough data on subtitle section on line {}. Order "
                u"number, timings and subtitle content are needed".
                format(self.transcriber.line_number)
            )

        # first line, order
        order_parse_error = False
        try:
            order_int = int(order_str.strip())
        except ValueError:
            order_parse_error = True
        else:
            if order_int <= 0:
                order_parse_error = True
        if order_parse_error:
            raise ParseError(
                u"Order number on line {line_no} ({order_no}) must be a "
                u"positive integer".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_str,
                )
            )
        if self.max_order is not None and order_int <= self.max_order:
            raise ParseError(
                u"Order numbers must be in ascending order; number in line "
                u"{line_no} ({order_no}) is wrong".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_int,
                )
            )
        else:
            self.max_order = order_int

        # second line, timings
        timings_parse_error = False
        try:
            splitted = timings.split(None, 3)
            if len(splitted) == 3:
                start, arrow, end = splitted
            else:
                start, arrow, end, _ = splitted
        except ValueError:
            timings_parse_error = True
        else:
            if arrow != u"-->":
                timings_parse_error = True
        if timings_parse_error:
            raise ParseError(
                u"Timings on line {} don't follow '[start] --> [end] "
                "(position)' pattern".format(
                    self.transcriber.line_number + 1
                )
            )
        try:
            start = self._format_timing(start)
        except ValueError:
            raise ParseError(
                u"Problem with start of timing at line {line_no}: '{start}'".
                format(line_no=self.transcriber.line_number + 1, start=start)
            )
        try:
            end = self._format_timing(end)
        except ValueError:
            raise ParseError(
                u"Problem with end of timing at line {line_no}: '{end}'".
                format(line_no=self.transcriber.line_number + 1, end=end)
            )

        # Content
        string_stripped = string.strip()
        if string_stripped == u"":
            raise ParseError(u"Subtitle is empty on line {}".
                             format(self.transcriber.line_number + 2))

        string = OpenString(order_str.strip(), string, order=order_int,
                            occurrences="{},{}".format(start, end))
        return offset + len(order_str) + 1 + len(timings) + 1, string

    def _format_timing(self, timing):
        try:
            rest, milliseconds = timing.split(',')
            milliseconds = "{:<03}".format(milliseconds)
        except ValueError:
            rest, milliseconds = timing, "000"
        hours, minutes, seconds = rest.split(':')
        hours, minutes, seconds, milliseconds = (int(hours),
                                                 int(minutes),
                                                 int(seconds),
                                                 int(milliseconds))
        return "{:02}:{:02}:{:02}.{:03}".format(hours, minutes, seconds,
                                                milliseconds)

    def compile(self, template, stringset, **kwargs):
        transcriber = Transcriber(template)
        template = transcriber.source
        stringset = iter(stringset)
        string = next(stringset)

        for start, subtitle_section in self.\
                _generate_split_subtitles(template):
            transcriber.copy_until(start)
            transcriber.mark_section_start()

            # Hash is supposed to follow second newline character
            first_newline = subtitle_section.index('\n')
            second_newline = subtitle_section.index('\n', first_newline + 1)
            hash_position = second_newline + 1

            if (subtitle_section[
                    hash_position:
                    hash_position + len(string.template_replacement)
                    ] == string.template_replacement):
                # found it
                transcriber.copy_until(start + hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                try:
                    string = next(stringset)
                except StopIteration:
                    pass
            else:
                # did not find it, must remove section
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                transcriber.remove_section()

        transcriber.copy_until(len(template))
        return transcriber.get_destination()

Beispiel #12

0

Datei anzeigen

Datei: yaml.py Projekt: transifex/openformats

    def _compile_from_template(self, template, stringset, **kwargs):
        """ Compiles translation file from template

        Iterates over the stringset and for each strings replaces
        template replacement in the template with the actual translation.

        Returns:
            The compiled file content.
        """
        transcriber = Transcriber(template)
        template = transcriber.source

        for string in stringset:
            if string.pluralized:
                translation = self._compile_pluralized(string)
            else:
                translation = self._write_styled_literal(string)
            hash_position = template.index(string.template_replacement)
            transcriber.copy_until(hash_position)
            # The context contains custom tags. If it exists, we must prepend
            # it and apply a space afterwards so it doesn't get merged with the
            # string
            if string.context:
                transcriber.add(string.context)
                transcriber.add(' ')
            transcriber.add(translation)
            transcriber.skip(len(string.template_replacement))

        transcriber.copy_until(len(template))
        compiled = transcriber.get_destination()

        return compiled

Beispiel #13

0

Datei anzeigen

    def compile(self, template, stringset, **kwargs):
        transcriber = Transcriber(template)
        template = transcriber.source
        stringset = iter(stringset)
        string = next(stringset)

        for start, subtitle_section in self.\
                _generate_split_subtitles(template):
            transcriber.copy_until(start)
            transcriber.mark_section_start()

            # Hash is supposed to follow second newline character
            first_newline = subtitle_section.index('\n')
            second_newline = subtitle_section.index('\n', first_newline + 1)
            hash_position = second_newline + 1

            if (subtitle_section[
                    hash_position:
                    hash_position + len(string.template_replacement)
                    ] == string.template_replacement):
                # found it
                transcriber.copy_until(start + hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                try:
                    string = next(stringset)
                except StopIteration:
                    pass
            else:
                # did not find it, must remove section
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                transcriber.remove_section()

        transcriber.copy_until(len(template))
        return transcriber.get_destination()

Beispiel #14

0

Datei anzeigen

Datei: srt.py Projekt: transifex/openformats

class SrtHandler(Handler):
    name = "SRT"
    extension = "srt"

    NON_SPACE_PAT = re.compile(r'[^\s]')

    def _generate_split_subtitles(self, content, **kwargs):
        start = 0
        for section in content.split('\n\n'):
            # find first non-space character of section
            match = self.NON_SPACE_PAT.search(section)
            if match:
                yield start + match.start(), section.strip()
            start += len(section) + 2

    def parse(self, content):
        self.transcriber = Transcriber(content)
        source = self.transcriber.source
        stringset = []
        self.max_order = None
        for start, subtitle_section in self._generate_split_subtitles(source):
            self.transcriber.copy_until(start)
            offset, string = self._parse_section(start, subtitle_section)

            if string:
                stringset.append(string)

                self.transcriber.copy_until(offset)
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(string.string))
            else:
                self.transcriber.copy_until(start + len(subtitle_section))

        self.transcriber.copy_until(len(source))

        template = self.transcriber.get_destination()
        return template, stringset

    def _parse_section(self, offset, section):
        try:
            order_str, timings, string = section.split('\n', 2)
        except ValueError:
            raise ParseError(
                u"Not enough data on subtitle section on line {}. Order "
                u"number, timings and subtitle content are needed".
                format(self.transcriber.line_number)
            )

        # first line, order
        order_parse_error = False
        try:
            order_int = int(order_str.strip())
        except ValueError:
            order_parse_error = True
        else:
            if order_int <= 0:
                order_parse_error = True
        if order_parse_error:
            raise ParseError(
                u"Order number on line {line_no} ({order_no}) must be a "
                u"positive integer".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_str,
                )
            )
        if self.max_order is not None and order_int <= self.max_order:
            raise ParseError(
                u"Order numbers must be in ascending order; number in line "
                u"{line_no} ({order_no}) is wrong".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_int,
                )
            )
        else:
            self.max_order = order_int

        # second line, timings
        timings_parse_error = False
        try:
            splitted = timings.split(None, 3)
            if len(splitted) == 3:
                start, arrow, end = splitted
            else:
                start, arrow, end, _ = splitted
        except ValueError:
            timings_parse_error = True
        else:
            if arrow != u"-->":
                timings_parse_error = True
        if timings_parse_error:
            raise ParseError(
                u"Timings on line {} don't follow '[start] --> [end] "
                "(position)' pattern".format(
                    self.transcriber.line_number + 1
                )
            )
        try:
            start = self._format_timing(start)
        except ValueError:
            raise ParseError(
                u"Problem with start of timing at line {line_no}: '{start}'".
                format(line_no=self.transcriber.line_number + 1, start=start)
            )
        try:
            end = self._format_timing(end)
        except ValueError:
            raise ParseError(
                u"Problem with end of timing at line {line_no}: '{end}'".
                format(line_no=self.transcriber.line_number + 1, end=end)
            )

        # Content
        string_stripped = string.strip()
        if string_stripped == u"":
            raise ParseError(u"Subtitle is empty on line {}".
                             format(self.transcriber.line_number + 2))

        string = OpenString(order_str.strip(), string, order=order_int,
                            occurrences="{},{}".format(start, end))
        return offset + len(order_str) + 1 + len(timings) + 1, string

    def _format_timing(self, timing):
        try:
            rest, milliseconds = timing.split(',')
            milliseconds = "{:<03}".format(milliseconds)
        except ValueError:
            rest, milliseconds = timing, "000"
        hours, minutes, seconds = rest.split(':')
        hours, minutes, seconds, milliseconds = (int(hours),
                                                 int(minutes),
                                                 int(seconds),
                                                 int(milliseconds))
        return "{:02}:{:02}:{:02}.{:03}".format(hours, minutes, seconds,
                                                milliseconds)

    def compile(self, template, stringset, **kwargs):
        transcriber = Transcriber(template)
        template = transcriber.source
        stringset = iter(stringset)
        string = next(stringset)

        for start, subtitle_section in self.\
                _generate_split_subtitles(template):
            transcriber.copy_until(start)
            transcriber.mark_section_start()

            # Hash is supposed to follow second newline character
            first_newline = subtitle_section.index('\n')
            second_newline = subtitle_section.index('\n', first_newline + 1)
            hash_position = second_newline + 1

            if (subtitle_section[
                    hash_position:
                    hash_position + len(string.template_replacement)
                    ] == string.template_replacement):
                # found it
                transcriber.copy_until(start + hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                try:
                    string = next(stringset)
                except StopIteration:
                    pass
            else:
                # did not find it, must remove section
                transcriber.copy_until(start + len(subtitle_section))
                transcriber.mark_section_end()
                transcriber.remove_section()

        transcriber.copy_until(len(template))
        return transcriber.get_destination()

Beispiel #15

0

Datei anzeigen

Datei: indesign.py Projekt: transifex/openformats

    def _escape_lt(string):
        """Escape `<` character (&lt;).

        If a valid XML escape sequence is found, it is left as it is.
        Otherwise, any occurrences of `<` are replaced with `&lt;`.
        E.g.:

            "hello world"         -> "hello world"
            "hello <world"        -> "hello &lt;world"
            "hello &lt;world"     -> "hello &lt;world"
        """
        # Find "lonely" `<` positions by finding all `<` positions
        # and subtracting the positions of `<` that are part of
        # valid XML escape sequences (based on
        # https://mayart.de/download/Indesign-IDML/special-idml-chars.pdf)
        all_lt_positions = {
            match.span()[0]
            for match in re.finditer(r'<', string)
        }
        escaped_lt_positions = {
            match.span()[0]
            for match in re.finditer(
                r'<(\?ACE 18\?|\?ACE 19\?|\?ACE 3\?|\?ACE 8\?|\?ACE 7\?|Br\/)>',
                string)
        }
        target_positions = sorted(all_lt_positions - escaped_lt_positions)

        # Use Transcriber to replace lonely ampersands with '&amp;'
        transcriber = Transcriber(string)
        for position in target_positions:
            transcriber.copy_until(position)
            transcriber.add('&lt;')
            transcriber.skip(1)
        transcriber.copy_to_end()
        return transcriber.get_destination()

Beispiel #16

0

Datei anzeigen

    def _compile_story(self, story_content):
        """ Handles the compilation of a single story
        args:
            story_content: the xml content of the story
        returns:
            compiled_story: the compiled story content
        """
        transcriber = Transcriber(story_content)
        hash_regex = re.compile(ensure_unicode(r'[a-z,0-9]{32}_tr'))
        found = True
        while found:
            try:
                current_string = self.stringset.pop(0)
                hash_position = story_content.index(
                    current_string.template_replacement)
            except ValueError:
                found = False
                self.stringset.insert(0, current_string)
            except IndexError:
                break
            else:
                transcriber.copy_until(hash_position)
                transcriber.add(self._escape_amps(current_string.string))
                transcriber.skip(len(current_string.template_replacement))

        # Update the XML file to contain the template strings
        transcriber.copy_until(len(story_content))
        compiled_story = transcriber.get_destination()
        # in case there are any hashes that have not been replaced, replace
        # them with an empty string
        compiled_story = hash_regex.sub(u'', compiled_story)
        return compiled_story

Beispiel #17

0

Datei anzeigen

    def compile(self, template, stringset, **kwargs):
        # Fix regex encoding
        space_pattern = re.compile(ensure_unicode(self.SPACE_PAT))

        # assume stringset is ordered within the template
        transcriber = Transcriber(template)
        template = transcriber.source

        for string in stringset:
            hash_position = template.index(string.template_replacement)
            if not string.pluralized:
                transcriber.copy_until(hash_position)
                transcriber.add(string.string)
                transcriber.skip(len(string.template_replacement))
            else:
                # if the hash is on its own on a line with only spaces, we have
                # to remember it's indent
                indent_length = template[hash_position::-1].index('\n') - 1
                indent = template[hash_position - indent_length:hash_position]
                tail_length = template[hash_position +
                                       len(string.template_replacement
                                           ):].index('\n')
                tail = template[hash_position + len(string.template_replacement
                                                    ):hash_position +
                                len(string.template_replacement) + tail_length]
                if (space_pattern.search(indent)
                        and space_pattern.search(tail)):
                    transcriber.copy_until(hash_position - indent_length)
                    for rule, value in six.iteritems(string.string):
                        transcriber.add(indent + self.plural_template.format(
                            rule=self.RULES_ITOA[rule], string=value) + tail +
                                        '\n')
                    transcriber.skip(indent_length +
                                     len(string.template_replacement) +
                                     tail_length + 1)
                else:
                    # string is not on its own, simply replace hash with all
                    # plural forms
                    transcriber.copy_until(hash_position)
                    for rule, value in six.iteritems(string.string):
                        transcriber.add(
                            self.plural_template.format(
                                rule=self.RULES_ITOA[rule], string=value))
                    transcriber.skip(len(string.template_replacement))

        transcriber.copy_until(len(template))
        compiled = transcriber.get_destination()

        return compiled

Beispiel #18

0

Datei anzeigen

    def _compile_from_template(self, template, stringset, **kwargs):
        """ Compiles translation file from template

        Iterates over the stringset and for each strings replaces
        template replacement in the template with the actual translation.

        Returns:
            The compiled file content.
        """
        transcriber = Transcriber(template)
        template = transcriber.source

        for string in stringset:
            if string.pluralized:
                translation = self._compile_pluralized(string)
            else:
                translation = self._write_styled_literal(string)
            hash_position = template.index(string.template_replacement)
            transcriber.copy_until(hash_position)
            # The context contains custom tags. If it exists, we must prepend
            # it and apply a space afterwards so it doesn't get merged with the
            # string
            if string.context:
                transcriber.add(string.context)
                transcriber.add(' ')
            transcriber.add(translation)
            transcriber.skip(len(string.template_replacement))

        transcriber.copy_until(len(template))
        compiled = transcriber.get_destination()

        return compiled

Beispiel #19

0

Datei anzeigen

    def _escape_amps(string):
        """ Escape "lonely" `&` (ampersands).

            If a valid XML escape sequence is found, it is left as it is.
            Otherwise, any occurrences of `&` are replaced with `&amp;`. Eg,

            "hello world"         -> "hello world"
            "hello &world"        -> "hello &amp;world"
            "hello &amp;world"    -> "hello &amp;world"
            "hello &lt;world"     -> "hello &lt;world"
            "hello &#x0a1f;world" -> "hello &#x0a1f;world"
            "&&#x05af;&&"         -> "&amp;&#x05af;&amp;&amp;"
        """

        # Find "lonely" ampersand positions by finding all ampersand positions
        # and subtracting the positions of ampersands that are part of valid
        # XML escape sequences
        all_amp_positions = {
            match.span()[0]
            for match in re.finditer(r'&', string)
        }
        escaped_amp_positions = {
            match.span()[0]
            for match in re.finditer(
                r'&(lt|gt|amp|apos|quot|#\d+|#x[0-9a-fA-F]+);', string)
        }
        target_positions = sorted(all_amp_positions - escaped_amp_positions)

        # Use Transcriber to replace lonely ampersands with '&amp;'
        transcriber = Transcriber(string)
        for position in target_positions:
            transcriber.copy_until(position)
            transcriber.add('&amp;')
            transcriber.skip(1)
        transcriber.copy_to_end()
        return transcriber.get_destination()

Beispiel #20

0

Datei anzeigen

Datei: beta_android.py Projekt: transifex/openformats

class BetaAndroidHandler(Handler):
    name = "BETA_ANDROID"
    extension = "xml"

    plural_template = u'<item quantity="{rule}">{string}</item>'
    SPACE_PAT = re.compile(r'^\s*$')
    # Atttibutes that designate a string should be filtered out
    FILTER_ATTRIBUTES = {'translatable': 'false'}

    EXTRACTS_RAW = False

    SPECIFIER = re.compile(
        r'%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#\- 0]*(?:\d+)?'
        r'(?:\.\d+)?(hh\|h\|l\|ll|j|z|t|L)?(?P<type>[diufFeEgGxXaAoscpn%])))')

    def parse(self, content, **kwargs):
        stringset = []
        if isinstance(content, six.binary_type):
            content = content.decode("utf-8")  # convert to unicode

        resources_tag_position = content.index("<resources")

        self.transcriber = Transcriber(content[resources_tag_position:])
        source = self.transcriber.source

        self._order = 0

        resources_tag = DumbXml(source)
        last_comment = ""
        for tag, offset in resources_tag.find(
            ("string-array", "string", "plurals", DumbXml.COMMENT)):
            if self._should_ignore(tag):
                last_comment = ""
                continue
            if tag.name == DumbXml.COMMENT:
                last_comment = tag.inner
                self.transcriber.copy_until(offset + len(tag.content))
            elif tag.name == "string":
                string = self._handle_string_tag(tag, offset, last_comment)
                last_comment = ""
                if string is not None:
                    stringset.append(string)
            elif tag.name == "string-array":
                for string in self._handle_string_array_tag(
                        tag, offset, last_comment):
                    if string is not None:
                        stringset.append(string)
                last_comment = ""
            elif tag.name == "plurals":
                string = self._handle_plurals_tag(tag, offset, last_comment)
                if string is not None:
                    stringset.append(string)
                last_comment = ""

        self.transcriber.copy_until(len(source))

        template = content[:resources_tag_position] +\
            self.transcriber.get_destination()

        self.transcriber = None

        return template, stringset

    def _handle_string_tag(self, tag, offset, comment):
        string = None
        if tag.inner.strip() != "":
            context = tag.attrs.get('product', "")
            string = OpenString(tag.attrs['name'],
                                tag.inner,
                                context=context,
                                order=self._order,
                                developer_comment=comment)
            self._order += 1

        # ... <string name="foo">Hello ....
        #                        ^
        self.transcriber.copy_until(offset + tag.inner_offset)

        # ... ing name="foo">Hello world</stri...
        #                               ^
        if string is not None:
            self.transcriber.add(string.template_replacement)
            self.transcriber.skip(len(tag.inner))
        else:
            self.transcriber.copy_until(offset + tag.inner_offset +
                                        len(tag.inner))

        # ...ello World</string>
        #                       ^
        self.transcriber.copy_until(offset + len(tag.content))

        return string

    def _handle_string_array_tag(self, string_array_tag, string_array_offset,
                                 comment):
        # ...ing-array>   <item>H...
        #              ^
        self.transcriber.copy_until(string_array_offset +
                                    string_array_tag.inner_offset)

        context = string_array_tag.attrs.get('product', "")
        for index, (item_tag,
                    item_offset) in enumerate(string_array_tag.find('item')):
            string = None
            if item_tag.inner.strip() != "":
                string = OpenString("{}[{}]".format(
                    string_array_tag.attrs['name'], index),
                                    item_tag.inner,
                                    context=context,
                                    order=self._order,
                                    developer_comment=comment)
                self._order += 1
                yield string

            # ... <item>Hello...
            #           ^
            self.transcriber.copy_until(string_array_offset + item_offset +
                                        item_tag.inner_offset)

            # ...ello world</item>...
            #              ^
            if string is not None:
                self.transcriber.add(string.template_replacement)
                self.transcriber.skip(len(item_tag.inner))
            else:
                self.transcriber.copy_until(string_array_offset + item_offset +
                                            item_tag.inner_offset)

            # orld</item>   <it...
            #            ^
            self.transcriber.copy_until(string_array_offset + item_offset +
                                        item_tag.inner_offset +
                                        len(item_tag.content))

        # </item>  </string-array>
        #                         ^
        self.transcriber.copy_until(string_array_offset +
                                    len(string_array_tag.content))

    def _handle_plurals_tag(self, plurals_tag, plurals_offset, comment):
        # <plurals name="foo">   <item>Hello ...
        #                     ^
        self.transcriber.copy_until(plurals_offset + plurals_tag.inner_offset)

        first_item_offset = None
        strings = {}
        for item_tag, item_offset in plurals_tag.find('item'):
            if item_tag.inner.strip() == "":
                strings = None
                break

            first_item_offset = first_item_offset or item_offset

            rule = self.get_rule_number(item_tag.attrs['quantity'])
            strings[rule] = item_tag.inner
        last_item_tag, last_item_offset = item_tag, item_offset

        if strings is not None:
            context = plurals_tag.attrs.get('product', "")
            string = OpenString(plurals_tag.attrs['name'],
                                strings,
                                pluralized=True,
                                context=context,
                                order=self._order,
                                developer_comment=comment)
            self._order += 1

            # <plurals name="foo">   <item>Hello ...
            #                        ^
            self.transcriber.copy_until(plurals_offset + first_item_offset)

            # ...</item>   </plurals>...
            #           ^
            self.transcriber.add(string.template_replacement)
            self.transcriber.skip(last_item_offset +
                                  len(last_item_tag.content) -
                                  first_item_offset)

        else:
            string = None

        # ...</plurals> ...
        #              ^
        self.transcriber.copy_until(plurals_offset + len(plurals_tag.content))

        return string

    def _should_ignore(self, tag):
        """
        If the tag has a key: value elemement that matches FILTER_ATTRIBUTES
        it will return True, else it returns False
        """
        for key, value in six.iteritems(self.FILTER_ATTRIBUTES):
            filter_attr = tag.attrs.get(key, None)
            if filter_attr is not None and filter_attr == value:
                return True
        return False

    def compile(self, template, stringset, **kwargs):
        resources_tag_position = template.index("<resources")
        self._stringset = list(stringset)
        self._stringset_index = 0

        self.transcriber = Transcriber(template[resources_tag_position:])
        self.source = self.transcriber.source

        resources_tag = DumbXml(self.source)

        for tag, offset in resources_tag.find(
            ("string", "string-array", "plurals")):
            if self._should_ignore(tag):
                continue
            if tag.name == "string":
                self._compile_string(tag, offset)
            elif tag.name == "string-array":
                self._compile_string_array(tag, offset)
            elif tag.name == "plurals":
                self._compile_plurals(tag, offset)
        self.transcriber.copy_until(len(self.source))

        # Lets do another pass to clear empty <string-array>s
        self.transcriber = Transcriber(self.transcriber.get_destination())
        self.source = self.transcriber.source
        resources_tag = DumbXml(self.source)
        for string_array_tag, string_array_offset in resources_tag.find(
                "string-array"):
            if (string_array_tag.inner
                    and len(list(string_array_tag.find("item"))) == 0):
                self.transcriber.copy_until(string_array_offset)
                self.transcriber.skip(len(string_array_tag.content))
        self.transcriber.copy_until(len(self.source))

        compiled = template[:resources_tag_position] +\
            self.transcriber.get_destination()

        self._stringset = None
        self._stringset_index = None
        self.transcriber = None

        return compiled

    def _compile_string(self, string_tag, string_offset):
        try:
            next_string = self._stringset[self._stringset_index]
        except IndexError:
            next_string = None
        if (next_string is not None
                and next_string.template_replacement == string_tag.inner):
            # found one to replace
            self._stringset_index += 1

            self.transcriber.copy_until(string_offset +
                                        string_tag.inner_offset)
            self.transcriber.add(next_string.string)
            self.transcriber.skip(len(string_tag.inner))
            self.transcriber.copy_until(string_offset +
                                        len(string_tag.content))

        else:
            # didn't find it, must remove by skipping it
            self.transcriber.copy_until(string_offset)
            self.transcriber.skip(len(string_tag.content))

    def _compile_string_array(self, string_array_tag, string_array_offset):
        self.transcriber.copy_until(string_array_offset +
                                    string_array_tag.inner_offset)
        for item_tag, item_offset in string_array_tag.find("item"):
            try:
                next_string = self._stringset[self._stringset_index]
            except IndexError:
                next_string = None
            if (next_string is not None
                    and next_string.template_replacement == item_tag.inner):
                # found one to replace
                self._stringset_index += 1

                self.transcriber.copy_until(string_array_offset + item_offset +
                                            item_tag.inner_offset)
                self.transcriber.add(next_string.string)
                self.transcriber.skip(len(item_tag.inner))
                self.transcriber.copy_until(string_array_offset + item_offset +
                                            len(item_tag.content))

            else:
                # didn't find it, must remove by skipping it
                self.transcriber.copy_until(string_array_offset + item_offset)
                self.transcriber.skip(len(item_tag.content))
        self.transcriber.copy_until(string_array_offset +
                                    len(string_array_tag.content))

    def _compile_plurals(self, plurals_tag, plurals_offset):
        try:
            next_string = self._stringset[self._stringset_index]
        except IndexError:
            next_string = None
        if (next_string is not None and next_string.template_replacement
                == plurals_tag.inner.strip()):
            # found one to replace, if the hash is on its own on a line with
            # only spaces, we have to remember it's indent
            self._stringset_index += 1

            is_multiline = True
            indent_length = tail_length = 0
            try:
                hash_position = plurals_offset + plurals_tag.inner_offset +\
                    plurals_tag.inner.index(next_string.template_replacement)
                indent_length = self.source[hash_position::-1].\
                    index('\n') - 1
                indent = self.source[hash_position -
                                     indent_length:hash_position]
                end_of_hash = (hash_position +
                               len(next_string.template_replacement))
                tail_length = self.source[end_of_hash:].index('\n')
                tail = self.source[end_of_hash:end_of_hash + tail_length]
            except ValueError:
                is_multiline = False

            is_multiline = (is_multiline and (self.SPACE_PAT.search(indent)
                                              and self.SPACE_PAT.search(tail)))

            if is_multiline:
                # write until beginning of hash
                self.transcriber.copy_until(hash_position - indent_length)
                for rule, value in six.iteritems(next_string.string):
                    self.transcriber.add(indent + self.plural_template.format(
                        rule=self.get_rule_string(rule), string=value) + tail +
                                         '\n')
                self.transcriber.skip(indent_length +
                                      len(next_string.template_replacement) +
                                      tail_length + 1)

            else:
                # string is not on its own, simply replace hash with all plural
                # forms
                self.transcriber.copy_until(hash_position)
                for rule, value in six.iteritems(next_string.string):
                    self.transcriber.add(
                        self.plural_template.format(
                            rule=self.get_rule_string(rule), string=value))
                self.transcriber.skip(indent_length +
                                      len(next_string.template_replacement) +
                                      tail_length)

            # finish up by copying until the end of </plurals>
            self.transcriber.copy_until(plurals_offset +
                                        len(plurals_tag.content))

        else:
            # didn't find it, must remove by skipping it
            self.transcriber.copy_until(plurals_offset)
            self.transcriber.skip_until(plurals_offset +
                                        len(plurals_tag.content))