Beispiel #1
0
    def _parse(self, is_source, lang_rules):
        resource = self.resource

        context = ""
        text = self.content

        name_start_char = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
            u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\
            u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
        name_char = name_start_char + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
        name = u'[' + name_start_char + u'][' + name_char + u']*'

        re_entity = u'<!ENTITY\s+(' + name + u')\s+((?:\"[^\"]*\")|(?:\'[^\']*\'))\s*>'
        re_comment = u'\<!\s*--(.*?)(?:--\s*\>)'
        re_tag = re.compile("(%s|%s)" % (re_entity, re_comment), re.M|re.S|re.U)

        latest_comment = ""
        for (orig, key, value, comment) in re.findall(re_tag, text):
            if key:
                self.stringset.strings.append(GenericTranslation(key,
                    self._unescape(value[1:-1]),
                    rule=5, # no plural forms
                    context=context, comment=latest_comment,
                    pluralized=False, fuzzy=False,
                    obsolete=False))
                if is_source:
                    hashed_entity = orig.replace(value,
                        '"%(hash)s_tr"' % {'hash': hash_tag(key, context)} )
                    text = text.replace(orig, hashed_entity)
                latest_comment = ""

            if comment:
                latest_comment = comment
        return text
Beispiel #2
0
    def _parse(self, is_source, lang_rules):
        """Parse a .properties content and create a stringset with
        all entries in it.
        """
        resource = self.resource

        context = ""
        self._find_linesep(self.content)
        template = u""
        lines = self._iter_by_line(self.content)
        comment_lines = []
        for line in lines:
            line = self._prepare_line(line)
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                if is_source:
                    template += line + self.linesep
                if not line:
                    # Reset comment block to zero, if newline happened.
                    # That is to omit start Licence texts and such
                    comment_lines = []
                else:
                    # this is a comment, add it to the block
                    comment_lines.append(line[1:])
                continue
            # If the last character is a backslash
            # it has to be preceded by a space in which
            # case the next line is read as part of the
            # same property
            while line[-1] == '\\' and not self._is_escaped(line, -1):
                # Read next line
                nextline = self._prepare_line(lines.next())
                # This line will become part of the value
                line = line[:-1] + self._check_escaped_ws(nextline)
            key, value, old_value = self._key_value_from_line(line)
            if is_source:
                if not (value and value.strip()):
                    template += line + self.linesep
                    # Keys with no values should not be shown to translator
                    continue
                else:
                    key_len = len(key)
                    template += line[:key_len] + re.sub(
                        re.escape(old_value),
                        "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                        line[key_len:]
                    ) + self.linesep
            elif not SourceEntity.objects.filter(resource=resource, string=key).exists():
                # ignore keys with no translation
                continue
            self.stringset.add(GenericTranslation(
                    key, self._unescape(value), context=context,
                    comment="\n".join(comment_lines),
            ))
            # reset comment block, it has already been written
            comment_lines = []
        if is_source:
            template = template[:-1*(len(self.linesep))]
        return template
Beispiel #3
0
    def parse_file(self, is_source=False, lang_rules=None):
        """
        Parse an INI file and create a stringset with all entries in the file.
        """
        stringset = StringSet()
        suggestions = StringSet()

        fh = codecs.open(self.filename, "r", "utf-8")
        try:
            buf = fh.read()
        finally:
            fh.close()

        for line in buf.split('\n'):
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                continue

            try:
                source, trans = line.split('=', 1)
            except ValueError:
                # Maybe abort instead of skipping?
                logger.error('Could not parse line "%s". Skipping...' % line)
                continue

            # In versions >=1.6 translations are surrounded by double quotes. So remove them
            # Normally, if the translation starts with '"', it is a 1.6-file and must
            # end with '"', since translations starting with '"' are not allowed in 1.5.
            # But, let's check both the first and last character of the translation to be safe.
            if trans.startswith('"') and trans.endswith('"'):
                trans = trans[1:-1]

            # We use empty context
            context = ""

            if is_source:
                source_len = len(source)
                new_line = line[:source_len] + re.sub(
                    re.escape(trans),
                    "%(hash)s_tr" % {'hash': hash_tag(source, context)},
                    line[source_len:]
                )
                # this looks fishy
                buf = re.sub(re.escape(line), new_line, buf)

            stringset.strings.append(GenericTranslation(source,
                trans, rule=5, context=context,
                pluralized=False, fuzzy=False,
                obsolete=False))

        self.stringset=stringset
        self.suggestions=suggestions

        if is_source:
            self.template = str(buf.encode('utf-8'))
Beispiel #4
0
    def _parse(self, is_source, lang_rules):
        """
        Parse an INI file and create a stringset with all entries in the file.
        """
        content = self.content
        self.jformat = JoomlaIniVersion.create(self.content)
        self._find_linesep(content)
        comment = ""

        buf = ''
        for line in self._iter_by_line(content):
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                if is_source:
                    buf += line + self.linesep
                    if line.startswith(self.comment_chars):
                        comment = line[1:] + self.linesep
                    else:
                        comment = ""
                continue

            try:
                source, trans = line.split('=', 1)
            except ValueError:
                # Maybe abort instead of skipping?
                logger.warning('Could not parse line "%s". Skipping...' % line)
                continue

            escaped_trans = self.jformat.get_translation(trans)
            if isinstance(self.jformat, JoomlaIniNew):
                trans = trans[1:-1]
            context = ""        # We use empty context

            if is_source:
                if not trans.strip():
                    buf += line + self.linesep
                    continue
                source_len = len(source)
                new_line = line[:source_len] + re.sub(
                    re.escape(trans),
                    "%(hash)s_tr" % {'hash': hash_tag(source, context)},
                    line[source_len:]
                )
                buf += new_line + self.linesep
            elif not SourceEntity.objects.filter(resource=self.resource, string=source).exists()\
                    or not escaped_trans.strip():
                #ignore keys with no translation
                context=""
                continue
            self._add_translation_string(source, self._unescape(escaped_trans),
                    context=context, comment=comment)
            comment = ""
        return buf[:buf.rfind(self.linesep)]
Beispiel #5
0
 def _update_plural_hashes(self, translations, content):
     """Update plural hashes for the target language."""
     for entry in itertools.ifilter(lambda e: e.msgid_plural, self.po):
         plural_keys = {}
         # last rule excluding other(5)
         lang_rules = self.language.get_pluralrules_numbers()
         # Initialize all plural rules up to the last
         string_hash = hash_tag(
             entry.msgid, escape_context(entry.msgctxt) or ''
         )
         for p in range(len(lang_rules)):
             plural_keys[p] = "%s_pl_%d" %(string_hash, p)
         entry.msgstr_plural = plural_keys
     return unicode(self.po)
    def _parse(self, is_source, lang_rules):
        """Parse a .properties content and create a stringset with
        all entries in it.
        """
        resource = self.resource

        context = ""
        self._find_linesep(self.content)
        template = u""
        lines = self._iter_by_line(self.content)
        for line in lines:
            line = self._prepare_line(line)
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                if is_source:
                    template += line + self.linesep
                continue
            # If the last character is a backslash
            # it has to be preceded by a space in which
            # case the next line is read as part of the
            # same property
            while line[-1] == '\\' and not self._is_escaped(line, -1):
                # Read next line
                nextline = self._prepare_line(lines.next())
                # This line will become part of the value
                line = line[:-1] + self._check_escaped_ws(nextline)
            key, value, old_value = self._key_value_from_line(line)
            if is_source:
                if not (value and value.strip()):
                    template += line + self.linesep
                    # Keys with no values should not be shown to translator
                    continue
                else:
                    key_len = len(key)
                    template += line[:key_len] + re.sub(
                        re.escape(old_value),
                        "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                        line[key_len:]) + self.linesep
            elif not SourceEntity.objects.filter(resource=resource,
                                                 string=key).exists():
                # ignore keys with no translation
                continue
            self.stringset.add(
                GenericTranslation(key, self._unescape(value),
                                   context=context))
        if is_source:
            template = template[:-1 * (len(self.linesep))]
        return template
Beispiel #7
0
    def _parse(self, is_source, lang_rules):
        """Parse a .properties content and create a stringset with
        all entries in it.
        """
        resource = self.resource

        context = ""
        self._find_linesep(self.content)
        template = u""
        lines = self._iter_by_line(self.content)
        for line in lines:
            line = self._prepare_line(line)
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                if is_source:
                    template += line + self.linesep
                continue
            # If the last character is a backslash
            # it has to be preceded by a space in which
            # case the next line is read as part of the
            # same property
            while line[-1] == "\\" and not self._is_escaped(line, -1):
                # Read next line
                nextline = self._prepare_line(lines.next())
                # This line will become part of the value
                line = line[:-1] + self._check_escaped_ws(nextline)
            key, value, old_value = self._key_value_from_line(line)
            if is_source:
                if not (value and value.strip()):
                    template += line + self.linesep
                    # Keys with no values should not be shown to translator
                    continue
                else:
                    key_len = len(key)
                    template += (
                        line[:key_len]
                        + re.sub(re.escape(old_value), "%(hash)s_tr" % {"hash": hash_tag(key, context)}, line[key_len:])
                        + self.linesep
                    )
            elif not SourceEntity.objects.filter(resource=resource, string=key).exists():
                # ignore keys with no translation
                continue
            self.stringset.add(GenericTranslation(key, self._unescape(value), context=context))
        if is_source:
            template = template[: -1 * (len(self.linesep))]
        return template
Beispiel #8
0
 def compare_to_actual_file(self, handler, actual_file):
     template = handler.template
     compiler = handler.CompilerClass(handler.resource)
     compiler._tdecorator = Decorator(escape_func=handler._escape)
     compiler._examine_content(handler.template)
     compiler.language = handler.language
     sources = [(idx, "%s" % hash_tag(s.source_entity, ""))
                for idx, s in enumerate(handler.stringset)]
     translations = dict([(idx, s.translation)
                          for idx, s in enumerate(handler.stringset)])
     with patch.object(compiler, '_get_source_strings') as smock:
         with patch.object(compiler, '_tset', create=True) as tmock:
             smock.return_value = sources
             tmock.return_value = translations
             compiler._compile(handler.template)
             template = compiler.compiled_template
     with open(actual_file, 'r') as f:
         actual_content = f.read()
     self.assertEquals(template, actual_content)
Beispiel #9
0
    def _parse(self, is_source, lang_rules):
        self._find_linesep(self.content)
        par_splitter = self.linesep + self.linesep
        template_open = "{{"
        template_ends = "}}"

        template = self.content
        context = ''

        prev_split_pos = 0
        prev_text_pos = 0
        while 1:
            par_pos = self.content.find(par_splitter, prev_split_pos)
            t_open_pos = self.content.find(template_open, prev_split_pos)
            if prev_text_pos == -1:
                break
            elif par_pos == -1 and t_open_pos == -1:
                # end of document
                source = trans = self.content[prev_text_pos:].strip()
                prev_text_pos = -1
            elif par_pos < t_open_pos or t_open_pos == -1:
                source = trans = self.content[prev_text_pos:par_pos].strip()
                if par_pos == -1:
                    prev_split_pos = prev_text_pos = -1
                else:
                    prev_split_pos = prev_text_pos = par_pos + 2
            else:
                t_end_pos = self.content.find(template_ends, prev_split_pos + 1)
                prev_split_pos = t_end_pos
                continue

            if not source.strip('\n'):
                continue
            source_len = len(source)
            template = re.sub(
                re.escape(trans),
                "%(hash)s_tr" % {'hash': hash_tag(source, context)},
                template
            )
            self.stringset.add(GenericTranslation(
                    source, trans, context=context
            ))
        return template
Beispiel #10
0
    def _parse(self, is_source, lang_rules):
        self._find_linesep(self.content)
        par_splitter = self.linesep + self.linesep
        template_open = "{{"
        template_ends = "}}"

        template = self.content
        context = ''

        prev_split_pos = 0
        prev_text_pos = 0
        while 1:
            par_pos = self.content.find(par_splitter, prev_split_pos)
            t_open_pos = self.content.find(template_open, prev_split_pos)
            if prev_text_pos == -1:
                break
            elif par_pos == -1 and t_open_pos == -1:
                # end of document
                source = trans = self.content[prev_text_pos:].strip()
                prev_text_pos = -1
            elif par_pos < t_open_pos or t_open_pos == -1:
                source = trans = self.content[prev_text_pos:par_pos].strip()
                if par_pos == -1:
                    prev_split_pos = prev_text_pos = -1
                else:
                    prev_split_pos = prev_text_pos = par_pos + 2
            else:
                t_end_pos = self.content.find(template_ends,
                                              prev_split_pos + 1)
                prev_split_pos = t_end_pos
                continue

            if not source.strip('\n'):
                continue
            source_len = len(source)
            template = re.sub(
                re.escape(trans),
                "%(hash)s_tr" % {'hash': hash_tag(source, context)}, template)
            self.stringset.add(
                GenericTranslation(source, trans, context=context))
        return template
Beispiel #11
0
    def _parse(self, is_source, lang_rules):
        resource = self.resource

        context = ""
        text = self.content

        name_start_char = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
            u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\
            u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
        name_char = name_start_char + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
        name = u'[' + name_start_char + u'][' + name_char + u']*'

        re_entity = u'<!ENTITY\s+(' + name + u')\s+((?:\"[^\"]*\")|(?:\'[^\']*\'))\s*>'
        re_comment = u'\<!\s*--(.*?)(?:--\s*\>)'
        re_tag = re.compile("(%s|%s)" % (re_entity, re_comment),
                            re.M | re.S | re.U)

        latest_comment = ""
        for (orig, key, value, comment) in re.findall(re_tag, text):
            if key:
                self.stringset.add(
                    GenericTranslation(
                        key,
                        self._unescape(value[1:-1]),
                        rule=5,  # no plural forms
                        context=context,
                        comment=latest_comment,
                        pluralized=False,
                        fuzzy=False,
                        obsolete=False))
                if is_source:
                    hashed_entity = orig.replace(
                        value,
                        '"%(hash)s_tr"' % {'hash': hash_tag(key, context)})
                    text = text.replace(orig, hashed_entity)
                latest_comment = ""

            if comment:
                latest_comment = comment
        return text
Beispiel #12
0
 def compare_to_actual_file(self, handler, actual_file):
     template = handler.template
     compiler = handler.CompilerClass(handler.resource)
     compiler._tdecorator = Decorator(escape_func=handler._escape)
     compiler._examine_content(handler.template)
     compiler.language = handler.language
     sources = [
         (idx, "%s" % hash_tag(s.source_entity, ""))
         for idx, s in enumerate(handler.stringset)
     ]
     translations = dict([
         (idx, s.translation)
         for idx, s in enumerate(handler.stringset)
     ])
     with patch.object(compiler, '_get_source_strings') as smock:
         with patch.object(compiler, '_tset', create=True) as tmock:
             smock.return_value = sources
             tmock.return_value = translations
             compiler._compile(handler.template)
             template = compiler.compiled_template
     with open(actual_file, 'r') as f:
         actual_content = f.read()
     self.assertEquals(template, actual_content)
Beispiel #13
0
                            obsolete=obsolete)
                i += 1

                if is_source:
                    if sourceString is None:
                        continue
                    if message.attributes.has_key("numerus") and \
                        message.attributes['numerus'].value=='yes' and translation:
                        numerusforms = translation.getElementsByTagName(
                            'numerusform')
                        for n, f in enumerate(numerusforms):
                            f.appendChild(
                                doc.createTextNode(
                                    "%(hash)s_pl_%(key)s" % {
                                        'hash':
                                        hash_tag(sourceString,
                                                 context_name + comment),
                                        'key':
                                        n
                                    }))
                    else:
                        if not translation:
                            translation = doc.createElement("translation")

                        # Delete all child nodes. This is usefull for xml like
                        # strings (eg html) where the translation text is split
                        # in multiple nodes.
                        translation.childNodes = []

                        translation.appendChild(
                            doc.createTextNode(("%(hash)s_tr" % {
                                'hash':
Beispiel #14
0
    def _parse(self, is_source, lang_rules):
        """Parse an apple .strings file and create a stringset with
        all entries in the file.

        See
        http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html
        for details.
        """
        resource = self.resource
        context = ""
        f = self.content
        p = re.compile(
            r'(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)',
            re.U,
        )
        c = re.compile(r"\s*/\*(.|\s)*?\*/\s*", re.U)
        ws = re.compile(r"\s+", re.U)
        buf = u""
        end = 0
        start = 0
        for i in p.finditer(f):
            start = i.start()
            end_ = i.end()
            line = i.group("line")
            key = i.group("key")
            if not key:
                key = i.group("property")
            value = i.group("value")
            while end < start:
                m = c.match(f, end, start) or ws.match(f, end, start)
                if not m or m.start() != end:
                    raise StringsParseError("Invalid syntax.")
                if is_source:
                    buf += f[end : m.end()]
                end = m.end()
            end = end_
            if is_source:
                if not value.strip():
                    buf += line
                    continue
                else:
                    line = f[start:end]
                    buf += line[0 : i.start("value") - start]
                    buf += re.sub(
                        re.escape(value),
                        "%(hash)s_tr" % {"hash": hash_tag(key, context)},
                        line[i.start("value") - start : i.end("value") - start],
                    )
                    buf += line[i.end("value") - start :]
            elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip():
                # ignore keys with no translation
                continue
            self.stringset.strings.append(
                GenericTranslation(
                    key, self._unescape(value), rule=5, context=context, pluralized=False, fuzzy=False, obsolete=False
                )
            )
        while len(f[end:]):
            m = c.match(f, end) or ws.match(f, end)
            if not m or m.start() != end:
                raise StringsParseError("Invalid syntax.")
            if is_source:
                buf += f[end : m.end()]
            end = m.end()
            if end == 0:
                break
        return buf
Beispiel #15
0
    def _parse(self, is_source, lang_rules):
        """Parse a .properties content and create a stringset with
        all entries in it.
        """
        resource = self.resource

        context = ""
        self._find_linesep(self.content)
        template = u""
        key_dict = {}
        rule = 5
        lines = self._iter_by_line(self.content)
        for line in lines:
            line = self._prepare_line(line)
            # Skip empty lines and comments
            if not line or line.startswith(self.comment_chars):
                if is_source:
                    template += line + self.linesep
                continue
            # If the last character is a backslash
            # it has to be preceded by a space in which
            # case the next line is read as part of the
            # same property
            while line[-1] == '\\' and not self._is_escaped(line, -1):
                # Read next line
                nextline = self._prepare_line(lines.next())
                # This line will become part of the value
                line = line[:-1] + self._prepare_line(nextline)
            key, value = self._split(line)

            self._visit_value(value)

            if is_source:
                if not value:
                    template += line + self.linesep
                    # Keys with no values should not be shown to translator
                    continue
                else:
                    key_len = len(key)
                    template += line[:key_len] + re.sub(
                        re.escape(value),
                        "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                        line[key_len:]
                    ) + self.linesep
            elif not SourceEntity.objects.filter(resource=resource, string=key).exists():
                # ignore keys with no translation
                continue

            if key in key_dict and key_dict[key].get(rule, None):
                g = GenericTranslation(key, self._unescape(
                    key_dict[key][rule]['translation']),
                    context=key_dict[key][rule]['context'])
                self.stringset.strings.remove(g)
            else:
                if key in key_dict:
                    key_dict[key][rule] = {
                                            'translation':self._unescape(value),
                                            'context': context
                                          }
                else:
                    key_dict[key] = {
                                rule: {
                                    'translation': self._unescape(value),
                                    'context': context
                                }
                            }

            self._add_translation_string(
                key, self._unescape(value), context=context
            )
        return template
Beispiel #16
0
                    comment = None
                if entry.flags:
                    flags = ', '.join( f for f in entry.flags)
                else:
                    flags = None
                context=escape_context(entry.msgctxt) or ''
                self._add_translation_string(
                    entry.msgid, msgstr[1], context=context,
                    occurrences=self._serialize_occurrences(entry.occurrences),
                    rule=msgstr[0], pluralized=pluralized, comment=comment,
                    flags=flags
                )

            if is_source:
                entry.msgstr = "%(hash)s_tr" % {
                    'hash': hash_tag(entry.msgid, context)
                }

                if entry.msgid_plural:
                    for n, rule in enumerate(plural_keys):
                        entry.msgstr_plural['%s' % n] = (
                            "%(hash)s_pl_%(key)s" % {
                                'hash':hash_tag(entry.msgid, context),
                                'key':n
                            }
                        )
        return self._po

    def _generate_template(self, po):
        return self.get_po_contents(po)
Beispiel #17
0
                            occurrences = ";".join(occurrences),
                            pluralized=pluralized, fuzzy=fuzzy,
                            comment=extracomment, obsolete=obsolete)
                i += 1

                if is_source:
                    if sourceString is None:
                        continue
                    if message.attributes.has_key("numerus") and \
                        message.attributes['numerus'].value=='yes' and translation:
                            numerusforms = translation.getElementsByTagName('numerusform')
                            for n,f in enumerate(numerusforms):
                                f.appendChild(doc.createTextNode(
                                        "%(hash)s_pl_%(key)s" %
                                        {
                                            'hash': hash_tag(sourceString,
                                                context_name + comment),
                                            'key': n
                                        }
                                ))
                    else:
                        if not translation:
                            translation = doc.createElement("translation")

                        # Delete all child nodes. This is usefull for xml like
                        # strings (eg html) where the translation text is split
                        # in multiple nodes.
                        translation.childNodes = []

                        translation.appendChild(doc.createTextNode(
                                ("%(hash)s_tr" % {'hash': hash_tag(
                                    sourceString, context_name + comment)})
Beispiel #18
0
    def parse_file(self, is_source=False, lang_rules=None):
        """
        Parse a java .properties file and create a stringset with
        all entries in the file.

        See
        http://download.oracle.com/javase/1.4.2/docs/api/java/util/PropertyResourceBundle.html,
        http://download.oracle.com/javase/1.4.2/docs/api/java/util/Properties.html#encoding and
        http://download.oracle.com/javase/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)
        for details.
        """
        resource = self.resource
        stringset = StringSet()
        suggestions = StringSet()

        context = ""
        fh = open(self.filename, "r")
        try:
            self.find_linesep(fh)
            buf = u""
            for line in fh:
                line = line.decode(self.ENCODING)
                line = self._prepare_line(line)
                # Skip empty lines and comments
                if not line or line.startswith(self.COMMENT_CHARS):
                    if is_source:
                        buf += line + self._linesep
                    continue
                # If the last character is a backslash
                # it has to be preceded by a space in which
                # case the next line is read as part of the
                # same property
                while line[-1] == '\\' and not self._is_escaped(line, -1):
                    # Read next line
                    nextline = self._prepare_line(fh.next())
                    # This line will become part of the value
                    line = line[:-1] + self._prepare_line(nextline)
                key, value = self._split(line)

                if is_source:
                    if not value:
                        buf += line + self._linesep
                        # Keys with no values should not be shown to translator
                        continue
                    else:
                        key_len = len(key)
                        buf += line[:key_len] + re.sub(
                            re.escape(value),
                            "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                            line[key_len:]
                        ) + self._linesep
                elif not SourceEntity.objects.filter(resource=resource, string=key).exists():
                    # ignore keys with no translation
                    continue

                stringset.strings.append(GenericTranslation(key,
                    self._unescape(value), rule=5, context=context,
                    pluralized=False, fuzzy=False,
                    obsolete=False))
        except UnicodeDecodeError, e:
            # raise JavaParseError(
            #     'Java .proeprties files must be in %s encoding.' % self.ENCODING
            # )
            raise JavaParseError(e.message)
Beispiel #19
0
    def parse_file(self, is_source=False, lang_rules=None):
        """
        Parse an apple .strings file and create a stringset with
        all entries in the file.

        See
        http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html
        for details.
        """
        resource = self.resource
        stringset = StringSet()
        suggestions = StringSet()

        context = ""
        fh = open(self.filename, "r")
        p = re.compile(r'(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)', re.U)
        c = re.compile(r'\s*/\*(.|\s)*?\*/\s*', re.U)
        ws = re.compile(r'\s+', re.U)
        try:
            f = fh.read()
            if chardet.detect(f)['encoding'].startswith('UTF-16'):
                f = f.decode('utf-16')
            else:
                f = f.decode(self.ENCODING)
            buf = u""
            end=0
            start = 0
            for i in p.finditer(f):
                start = i.start()
                end_ = i.end()
                line = i.group('line')
                key = i.group('key')
                if not key:
                    key = i.group('property')
                value = i.group('value')
                while end < start:
                    m = c.match(f, end, start) or ws.match(f, end, start)
                    if not m or m.start() != end:
                        raise StringsParseError("Invalid syntax.")
                    if is_source:
                        buf += f[end:m.end()]
                    end = m.end()
                end = end_
                if is_source:
                    if not value.strip():
                        buf += line
                        continue
                    else:
                        line = f[start:end]
                        value = f[i.start('value'):i.end('value')]
                        buf += re.sub(
                            re.escape(value),
                            "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                            line
                        )
                elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip():
                    # ignore keys with no translation
                    continue
                stringset.strings.append(GenericTranslation(key,
                    self._unescape(value), rule=5, context=context,
                    pluralized=False, fuzzy=False,
                    obsolete=False)) 
            while len(f[end:]):
                m = c.match(f, end) or ws.match(f, end)
                if not m or m.start() != end:
                    raise StringsParseError("Invalid syntax.")
                if is_source:
                    buf += f[end:m.end()]
                end = m.end()
                if end == 0:
                    break

        except UnicodeDecodeError, e:
            raise StringsParseError(e.message)
Beispiel #20
0
                    msgstr[1],
                    context=escape_context(entry.msgctxt) or "",
                    occurrences=", ".join([":".join([i for i in t]) for t in entry.occurrences]),
                    rule=msgstr[0],
                    pluralized=pluralized,
                )

                stringset.strings.append(translation)

            if entry.comment:
                translation.comment = entry.comment
            if entry.flags:
                translation.flags = ", ".join(f for f in entry.flags)

            if is_source:
                entry.msgstr = "%(hash)s_tr" % {"hash": hash_tag(translation.source_entity, translation.context)}

                if entry.msgid_plural:
                    for n, rule in enumerate(plural_keys):
                        entry.msgstr_plural["%s" % n] = "%(hash)s_pl_%(key)s" % {
                            "hash": hash_tag(translation.source_entity, translation.context),
                            "key": n,
                        }

        if is_source:
            self.template = self.get_po_contents(pofile)

        self.stringset = stringset
        self.suggestions = suggestions
        return pofile
    def _parse(self, is_source, lang_rules):
        """Parse an apple .strings file and create a stringset with
        all entries in the file.

        See
        http://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html
        for details.
        """
        resource = self.resource
        context = ""
        f = self.content
        prefix = ""
        if f.startswith(u'\ufeff'):
            prefix = u'\ufeff'
            f = f.lstrip(u'\ufeff')
        #regex for finding all comments in a file
        cp = r'(?:/\*(?P<comment>(?:[^*]|(?:\*+[^*/]))*\**)\*/)'
        p = re.compile(r'(?:%s[ \t]*[\n]|[\r\n]|[\r]){0,1}(?P<line>(("(?P<key>[^"\\]*(?:\\.[^"\\]*)*)")|(?P<property>\w+))\s*=\s*"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\s*;)'%cp, re.DOTALL|re.U)
        #c = re.compile(r'\s*/\*(.|\s)*?\*/\s*', re.U)
        c = re.compile(r'//[^\n]*\n|/\*(?:.|[\r\n])*?\*/', re.U)
        ws = re.compile(r'\s+', re.U)
        buf = u""
        end=0
        start = 0
        for i in p.finditer(f):
            start = i.start('line')
            end_ = i.end()
            line = i.group('line')
            key = i.group('key')
            comment = i.group('comment') or ''
            if not key:
                key = i.group('property')
            value = i.group('value')
            while end < start:
                m = c.match(f, end, start) or ws.match(f, end, start)
                if not m or m.start() != end:
                    raise StringsParseError("Invalid syntax: %s" %\
                            f[end:start])
                if is_source:
                    buf += f[end:m.end()]
                end = m.end()
            end = end_
            key = self._unescape_key(key)
            if is_source:
                if not value.strip():
                    buf += line
                    continue
                else:
                    line = f[start:end]
                    buf += line[0:i.start('value')-start]
                    buf += re.sub(
                        re.escape(value),
                        "%(hash)s_tr" % {'hash': hash_tag(key, context)},
                        line[i.start('value')-start:i.end('value')-start]
                    )
                    buf += line[i.end('value')-start:]
            elif not SourceEntity.objects.filter(resource=resource, string=key).exists() or not value.strip():
                # ignore keys with no translation
                continue
            self.stringset.add(GenericTranslation(
                    key, self._unescape(value), rule=5, context=context,
                    pluralized=False, fuzzy=False, comment=comment,
                    obsolete=False
            ))
        while len(f[end:]):
            m = c.match(f, end) or ws.match(f, end)
            if not m or m.start() != end:
                raise StringsParseError("Invalid syntax: %s" %  f[end:])
            if is_source:
                buf += f[end:m.end()]
            end = m.end()
            if end == 0:
                break
        if is_source:
            buf = prefix + buf
        return buf
Beispiel #22
0
 def parse_tag_trans_unit(self, trans_unit_node, is_source=False, context=[], source_string = None, rule = None):
     source = ""
     source_node = trans_unit_node.getElementsByTagName("source")[0]
     if len(source_node.childNodes)>1:
         for i in source_node.childNodes:
             source += i.toxml()
     else:
         source = source_node.firstChild.data
     if source_string:
         pluralized = True
     else:
         pluralized = False
     for node in trans_unit_node.childNodes:
         if node.nodeType == node.ELEMENT_NODE and node.localName == "context-group" and not source_string and not rule:
             context.extend(self.parse_tag_context_group(node, is_source))
         # TODO prop-group, note, count-group, alt-trans
     # TODO seq-source
     context = escape_context(context)
     if is_source:
         translation = source
         if pluralized:
             source = source_string
         target = self.doc.createElement("target")
         target.childNodes = []
         if source_string and rule:
             target.appendChild(self.doc.createTextNode(
                 ("%(hash)s_pl_%(rule)s" % {'hash': hash_tag(
                     source_string, context), 'rule':rule})
             ))
         else:
             target.appendChild(self.doc.createTextNode(
                     ("%(hash)s_tr" % {'hash': hash_tag(
                         source, context)})
             ))
         if translation and not translation.strip():
             return
         indent_node = source_node.previousSibling.cloneNode(True)
         if source_node.nextSibling:
             trans_unit_node.insertBefore(target, source_node.nextSibling)
             trans_unit_node.insertBefore(indent_node, source_node.nextSibling)
         else:
             trans_unit_node.appendChild(indent_node)
             trans_unit_node.appendChild(target)
     else:
         if pluralized:
             source = source_string
         target_list = trans_unit_node.getElementsByTagName("target")
         if target_list:
             if len(target_list[0].childNodes)>1:
                 translation = self._getText(target_list[0].childNodes)
             else:
                 if target_list[0].firstChild:
                     translation = target_list[0].firstChild.data
                 else:
                     translation = u""
         else:
             translation = u""
         if not translation:
             return
         # TODO - do something with inline elements
     if pluralized:
          self.stringset_.strings.append(GenericTranslation(source,
                 translation, rule=rule,
                 context=context, pluralized=True, fuzzy=False,
                 obsolete=False))
     else:
          self.stringset_.strings.append(GenericTranslation(source,
                 translation, rule=5,
                 context=context, pluralized=False, fuzzy=False,
                 obsolete=False))