def getTranslationMessageData(self, translationmessage):
     # Convert a TranslationMessage to TranslationMessageData object,
     # which is used during import.
     potmsgset = translationmessage.potmsgset
     message_data = TranslationMessageData()
     message_data.context = potmsgset.context
     message_data.msgid_singular = potmsgset.singular_text
     message_data.msgid_plural = potmsgset.plural_text
     translations = translationmessage.translations
     for plural_form, translation in enumerate(translations):
         message_data.addTranslation(plural_form, translation)
     return message_data
 def getTranslationMessageData(self, translationmessage):
     # Convert a TranslationMessage to TranslationMessageData object,
     # which is used during import.
     potmsgset = translationmessage.potmsgset
     message_data = TranslationMessageData()
     message_data.context = potmsgset.context
     message_data.msgid_singular = potmsgset.singular_text
     message_data.msgid_plural = potmsgset.plural_text
     translations = translationmessage.translations
     for plural_form, translation in enumerate(translations):
         message_data.addTranslation(plural_form, translation)
     return message_data
Example #3
0
    def new_general_entity(self, name, value):
        """See `xmldtd.WFCDTD`."""
        if not self.started:
            return

        message = TranslationMessageData()
        message.msgid_singular = name
        # CarlosPerelloMarin 20070326: xmldtd parser does an inline
        # parsing which means that the content is all in a single line so we
        # don't have a way to show the line number with the source reference.
        message.file_references_list = ["%s(%s)" % (self.filename, name)]
        message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
        message.singular_text = value
        message.context = self.chrome_path
        message.source_comment = self.last_comment
        self.messages.append(message)
        self.started += 1
        self.last_comment = None
    def new_general_entity(self, name, value):
        """See `xmldtd.WFCDTD`."""
        if not self.started:
            return

        message = TranslationMessageData()
        message.msgid_singular = name
        # CarlosPerelloMarin 20070326: xmldtd parser does an inline
        # parsing which means that the content is all in a single line so we
        # don't have a way to show the line number with the source reference.
        message.file_references_list = ["%s(%s)" % (self.filename, name)]
        message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
        message.singular_text = value
        message.context = self.chrome_path
        message.source_comment = self.last_comment
        self.messages.append(message)
        self.started += 1
        self.last_comment = None
Example #5
0
    def _fetchDBRows(self, simulate_timeout=False):
        msgstr_joins = [
            "LEFT OUTER JOIN POTranslation AS pt%d "
            "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form)
            for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)
        ]

        translations = [
            "pt%d.translation AS translation%d" % (form, form)
            for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)
        ]

        substitutions = {
            'translation_columns': ', '.join(translations),
            'translation_joins': '\n'.join(msgstr_joins),
            'language': quote(self.pofile.language),
            'potemplate': quote(self.pofile.potemplate),
            'flag': self._getFlagName(),
        }

        sql = """
            SELECT
                POMsgId.msgid AS msgid,
                POMsgID_Plural.msgid AS msgid_plural,
                context,
                date_reviewed,
                %(translation_columns)s
            FROM POTMsgSet
            JOIN TranslationTemplateItem ON
                TranslationTemplateItem.potmsgset = POTMsgSet.id AND
                TranslationTemplateItem.potemplate = %(potemplate)s
            JOIN TranslationMessage ON
                POTMsgSet.id=TranslationMessage.potmsgset AND (
                    TranslationMessage.potemplate = %(potemplate)s OR
                    TranslationMessage.potemplate IS NULL) AND
                TranslationMessage.language = %(language)s
            %(translation_joins)s
            JOIN POMsgID ON
                POMsgID.id = POTMsgSet.msgid_singular
            LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON
                POMsgID_Plural.id = POTMsgSet.msgid_plural
            WHERE
                %(flag)s IS TRUE
            ORDER BY
                TranslationTemplateItem.sequence,
                TranslationMessage.potemplate NULLS LAST
          """ % substitutions

        cur = cursor()
        try:
            # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a
            # timeout to work around bug 408718, but the query is
            # simpler now.  See if we still need this.

            # We have to commit what we've got so far or we'll lose
            # it when we hit TimeoutError.
            transaction.commit()

            if simulate_timeout:
                # This is used in tests.
                timeout = '1ms'
                query = "SELECT pg_sleep(2)"
            else:
                timeout = 1000 * int(config.poimport.statement_timeout)
                query = sql
            cur.execute("SET statement_timeout to %s" % quote(timeout))
            cur.execute(query)
        except TimeoutError:
            # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we
            # know whether it still happens.
            transaction.abort()
            return

        rows = cur.fetchall()

        assert TranslationConstants.MAX_PLURAL_FORMS == 6, (
            "Change this code to support %d plural forms" %
            TranslationConstants.MAX_PLURAL_FORMS)
        for row in rows:
            msgid, msgid_plural, context, date = row[:4]
            # The last part of the row is msgstr0 .. msgstr5. Store them
            # in a dict indexed by the number of the plural form.
            msgstrs = dict(enumerate(row[4:]))

            key = (msgid, msgid_plural, context)
            if key in self.current_messages:
                message = self.current_messages[key]
            else:
                message = TranslationMessageData()
                self.current_messages[key] = message

                message.context = context
                message.msgid_singular = msgid
                message.msgid_plural = msgid_plural

            for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS):
                msgstr = msgstrs.get(plural, None)
                if (msgstr is not None
                        and ((len(message.translations) > plural
                              and message.translations[plural] is None) or
                             (len(message.translations) <= plural))):
                    message.addTranslation(plural, msgstr)
    def parse(self, content):
        """Parse given content as a property file.

        Once the parse is done, self.messages has a list of the available
        `ITranslationMessageData`s.
        """

        # .properties files are supposed to be unicode-escaped, but we know
        # that there are some .xpi language packs that instead, use UTF-8.
        # That's against the specification, but Mozilla applications accept
        # it anyway, so we try to support it too.
        # To do this support, we read the text as being in UTF-8
        # because unicode-escaped looks like ASCII files.
        try:
            content = content.decode('utf-8')
        except UnicodeDecodeError:
            raise TranslationFormatInvalidInputError(
                'Content is not valid unicode-escaped text')

        line_num = 0
        is_multi_line_comment = False
        last_comment = None
        last_comment_line_num = 0
        ignore_comment = False
        is_message = False
        translation = u''
        for line in content.splitlines():
            # Now, to "normalize" all to the same encoding, we encode to
            # unicode-escape first, and then decode it to unicode
            # XXX: Danilo 2006-08-01: we _might_ get performance
            # improvements if we reimplement this to work directly,
            # though, it will be hard to beat C-based de/encoder.
            # This call unescapes everything so we don't need to care about
            # quotes escaping.
            try:
                string = line.encode('raw-unicode_escape')
                line = string.decode('unicode_escape')
            except UnicodeDecodeError as exception:
                raise TranslationFormatInvalidInputError(
                    filename=self.filename,
                    line_number=line_num,
                    message=str(exception))

            line_num += 1
            if not is_multi_line_comment:
                # Remove any white space before the useful data, like
                # ' # foo'.
                line = line.lstrip()
                if len(line) == 0:
                    # It's an empty line. Reset any previous comment we have.
                    last_comment = None
                    last_comment_line_num = 0
                    ignore_comment = False
                elif line.startswith(u'#') or line.startswith(u'//'):
                    # It's a whole line comment.
                    ignore_comment = False
                    line = line[1:].strip()
                    if last_comment:
                        last_comment += line
                    elif len(line) > 0:
                        last_comment = line

                    if last_comment and not last_comment.endswith('\n'):
                        # Comments must end always with a new line.
                        last_comment += '\n'

                    last_comment_line_num = line_num
                    continue

            # Unescaped URLs are a common mistake: the "//" starts an
            # end-of-line comment.  To work around that, treat "://" as
            # a special case.
            just_saw_colon = False

            while line:
                if is_multi_line_comment:
                    if line.startswith(u'*/'):
                        # The comment ended, we jump the closing tag and
                        # continue with the parsing.
                        line = line[2:]
                        is_multi_line_comment = False
                        last_comment_line_num = line_num
                        if ignore_comment:
                            last_comment = None
                            ignore_comment = False

                        # Comments must end always with a new line.
                        last_comment += '\n'
                    elif line.startswith(self.license_block_text):
                        # It's a comment with a licence notice, this
                        # comment can be ignored.
                        ignore_comment = True
                        # Jump the whole tag
                        line = line[len(self.license_block_text):]
                    else:
                        # Store the character.
                        if last_comment is None:
                            last_comment = line[0]
                        elif last_comment_line_num == line_num:
                            last_comment += line[0]
                        else:
                            last_comment = u'%s\n%s' % (last_comment, line[0])
                            last_comment_line_num = line_num
                        # Jump the processed char.
                        line = line[1:]
                    continue
                elif line.startswith(u'/*'):
                    # It's a multi line comment
                    is_multi_line_comment = True
                    ignore_comment = False
                    last_comment_line_num = line_num
                    # Jump the comment starting tag
                    line = line[2:]
                    continue
                elif line.startswith(u'//') and not just_saw_colon:
                    # End-of-line comment.
                    last_comment = '%s\n' % line[2:].strip()
                    last_comment_line_num = line_num
                    # On to next line.
                    break
                elif is_message:
                    # Store the char and continue.
                    head_char = line[0]
                    translation += head_char
                    line = line[1:]
                    just_saw_colon = (head_char == ':')
                    continue
                elif u'=' in line:
                    # Looks like a message string.
                    (key, value) = line.split('=', 1)
                    # Remove leading and trailing white spaces.
                    key = key.strip()

                    if valid_property_msgid(key):
                        is_message = True
                        # Jump the msgid, control chars and leading white
                        # space.
                        line = value.lstrip()
                        continue
                    else:
                        raise TranslationFormatSyntaxError(
                            line_number=line_num,
                            message=u"invalid msgid: '%s'" % key)
                else:
                    # Got a line that is not a valid message nor a valid
                    # comment. Ignore it because main en-US.xpi catalog from
                    # Firefox has such line/error. We follow the 'be strict
                    # with what you export, be permisive with what you import'
                    # policy.
                    break
            if is_message:
                # We just parsed a message, so we need to add it to the list
                # of messages.
                if ignore_comment or last_comment_line_num < line_num - 1:
                    # We must ignore the comment or either the comment is not
                    # the last thing before this message or is not in the same
                    # line as this message.
                    last_comment = None
                    ignore_comment = False

                message = TranslationMessageData()
                message.msgid_singular = key
                message.context = self.chrome_path
                message.file_references_list = [
                    "%s:%d(%s)" % (self.filename, line_num, key)
                ]
                value = translation.strip()
                message.addTranslation(TranslationConstants.SINGULAR_FORM,
                                       value)
                message.singular_text = value
                message.source_comment = last_comment
                self.messages.append(message)

                # Reset status vars.
                last_comment = None
                last_comment_line_num = 0
                is_message = False
                translation = u''
    def parse(self, content):
        """Parse given content as a property file.

        Once the parse is done, self.messages has a list of the available
        `ITranslationMessageData`s.
        """

        # .properties files are supposed to be unicode-escaped, but we know
        # that there are some .xpi language packs that instead, use UTF-8.
        # That's against the specification, but Mozilla applications accept
        # it anyway, so we try to support it too.
        # To do this support, we read the text as being in UTF-8
        # because unicode-escaped looks like ASCII files.
        try:
            content = content.decode('utf-8')
        except UnicodeDecodeError:
            raise TranslationFormatInvalidInputError, (
                'Content is not valid unicode-escaped text')

        line_num = 0
        is_multi_line_comment = False
        last_comment = None
        last_comment_line_num = 0
        ignore_comment = False
        is_message = False
        translation = u''
        for line in content.splitlines():
            # Now, to "normalize" all to the same encoding, we encode to
            # unicode-escape first, and then decode it to unicode
            # XXX: Danilo 2006-08-01: we _might_ get performance
            # improvements if we reimplement this to work directly,
            # though, it will be hard to beat C-based de/encoder.
            # This call unescapes everything so we don't need to care about
            # quotes escaping.
            try:
                string = line.encode('raw-unicode_escape')
                line = string.decode('unicode_escape')
            except UnicodeDecodeError as exception:
                raise TranslationFormatInvalidInputError(
                    filename=self.filename, line_number=line_num,
                    message=str(exception))

            line_num += 1
            if not is_multi_line_comment:
                # Remove any white space before the useful data, like
                # ' # foo'.
                line = line.lstrip()
                if len(line) == 0:
                    # It's an empty line. Reset any previous comment we have.
                    last_comment = None
                    last_comment_line_num = 0
                    ignore_comment = False
                elif line.startswith(u'#') or line.startswith(u'//'):
                    # It's a whole line comment.
                    ignore_comment = False
                    line = line[1:].strip()
                    if last_comment:
                        last_comment += line
                    elif len(line) > 0:
                        last_comment = line

                    if last_comment and not last_comment.endswith('\n'):
                        # Comments must end always with a new line.
                        last_comment += '\n'

                    last_comment_line_num = line_num
                    continue

            # Unescaped URLs are a common mistake: the "//" starts an
            # end-of-line comment.  To work around that, treat "://" as
            # a special case.
            just_saw_colon = False

            while line:
                if is_multi_line_comment:
                    if line.startswith(u'*/'):
                        # The comment ended, we jump the closing tag and
                        # continue with the parsing.
                        line = line[2:]
                        is_multi_line_comment = False
                        last_comment_line_num = line_num
                        if ignore_comment:
                            last_comment = None
                            ignore_comment = False

                        # Comments must end always with a new line.
                        last_comment += '\n'
                    elif line.startswith(self.license_block_text):
                        # It's a comment with a licence notice, this
                        # comment can be ignored.
                        ignore_comment = True
                        # Jump the whole tag
                        line = line[len(self.license_block_text):]
                    else:
                        # Store the character.
                        if last_comment is None:
                            last_comment = line[0]
                        elif last_comment_line_num == line_num:
                            last_comment += line[0]
                        else:
                            last_comment = u'%s\n%s' % (last_comment, line[0])
                            last_comment_line_num = line_num
                        # Jump the processed char.
                        line = line[1:]
                    continue
                elif line.startswith(u'/*'):
                    # It's a multi line comment
                    is_multi_line_comment = True
                    ignore_comment = False
                    last_comment_line_num = line_num
                    # Jump the comment starting tag
                    line = line[2:]
                    continue
                elif line.startswith(u'//') and not just_saw_colon:
                    # End-of-line comment.
                    last_comment = '%s\n' % line[2:].strip()
                    last_comment_line_num = line_num
                    # On to next line.
                    break
                elif is_message:
                    # Store the char and continue.
                    head_char = line[0]
                    translation += head_char
                    line = line[1:]
                    just_saw_colon = (head_char == ':')
                    continue
                elif u'=' in line:
                    # Looks like a message string.
                    (key, value) = line.split('=', 1)
                    # Remove leading and trailing white spaces.
                    key = key.strip()

                    if valid_property_msgid(key):
                        is_message = True
                        # Jump the msgid, control chars and leading white
                        # space.
                        line = value.lstrip()
                        continue
                    else:
                        raise TranslationFormatSyntaxError(
                            line_number=line_num,
                            message=u"invalid msgid: '%s'" % key)
                else:
                    # Got a line that is not a valid message nor a valid
                    # comment. Ignore it because main en-US.xpi catalog from
                    # Firefox has such line/error. We follow the 'be strict
                    # with what you export, be permisive with what you import'
                    # policy.
                    break
            if is_message:
                # We just parsed a message, so we need to add it to the list
                # of messages.
                if ignore_comment or last_comment_line_num < line_num - 1:
                    # We must ignore the comment or either the comment is not
                    # the last thing before this message or is not in the same
                    # line as this message.
                    last_comment = None
                    ignore_comment = False

                message = TranslationMessageData()
                message.msgid_singular = key
                message.context = self.chrome_path
                message.file_references_list = [
                    "%s:%d(%s)" % (self.filename, line_num, key)]
                value = translation.strip()
                message.addTranslation(
                    TranslationConstants.SINGULAR_FORM, value)
                message.singular_text = value
                message.source_comment = last_comment
                self.messages.append(message)

                # Reset status vars.
                last_comment = None
                last_comment_line_num = 0
                is_message = False
                translation = u''
    def _fetchDBRows(self, simulate_timeout=False):
        msgstr_joins = [
            "LEFT OUTER JOIN POTranslation AS pt%d "
            "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form)
            for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)]

        translations = [
            "pt%d.translation AS translation%d" % (form, form)
            for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)]

        substitutions = {
            'translation_columns': ', '.join(translations),
            'translation_joins': '\n'.join(msgstr_joins),
            'language': quote(self.pofile.language),
            'potemplate': quote(self.pofile.potemplate),
            'flag': self._getFlagName(),
        }

        sql = """
            SELECT
                POMsgId.msgid AS msgid,
                POMsgID_Plural.msgid AS msgid_plural,
                context,
                date_reviewed,
                %(translation_columns)s
            FROM POTMsgSet
            JOIN TranslationTemplateItem ON
                TranslationTemplateItem.potmsgset = POTMsgSet.id AND
                TranslationTemplateItem.potemplate = %(potemplate)s
            JOIN TranslationMessage ON
                POTMsgSet.id=TranslationMessage.potmsgset AND (
                    TranslationMessage.potemplate = %(potemplate)s OR
                    TranslationMessage.potemplate IS NULL) AND
                TranslationMessage.language = %(language)s
            %(translation_joins)s
            JOIN POMsgID ON
                POMsgID.id = POTMsgSet.msgid_singular
            LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON
                POMsgID_Plural.id = POTMsgSet.msgid_plural
            WHERE
                %(flag)s IS TRUE
            ORDER BY
                TranslationTemplateItem.sequence,
                TranslationMessage.potemplate NULLS LAST
          """ % substitutions

        cur = cursor()
        try:
            # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a
            # timeout to work around bug 408718, but the query is
            # simpler now.  See if we still need this.

            # We have to commit what we've got so far or we'll lose
            # it when we hit TimeoutError.
            transaction.commit()

            if simulate_timeout:
                # This is used in tests.
                timeout = '1ms'
                query = "SELECT pg_sleep(2)"
            else:
                timeout = 1000 * int(config.poimport.statement_timeout)
                query = sql
            cur.execute("SET statement_timeout to %s" % quote(timeout))
            cur.execute(query)
        except TimeoutError:
            # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we
            # know whether it still happens.
            transaction.abort()
            return

        rows = cur.fetchall()

        assert TranslationConstants.MAX_PLURAL_FORMS == 6, (
            "Change this code to support %d plural forms"
            % TranslationConstants.MAX_PLURAL_FORMS)
        for row in rows:
            msgid, msgid_plural, context, date = row[:4]
            # The last part of the row is msgstr0 .. msgstr5. Store them
            # in a dict indexed by the number of the plural form.
            msgstrs = dict(enumerate(row[4:]))

            key = (msgid, msgid_plural, context)
            if key in self.current_messages:
                message = self.current_messages[key]
            else:
                message = TranslationMessageData()
                self.current_messages[key] = message

                message.context = context
                message.msgid_singular = msgid
                message.msgid_plural = msgid_plural

            for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS):
                msgstr = msgstrs.get(plural, None)
                if (msgstr is not None and
                    ((len(message.translations) > plural and
                      message.translations[plural] is None) or
                     (len(message.translations) <= plural))):
                    message.addTranslation(plural, msgstr)